From 4d7dc8dbb93a34f8d68a1413f930080e1c467839 Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Mon, 22 Jan 2024 20:40:50 +0000 Subject: [PATCH 1/9] squash --- .../HloToByreTensor/HloToByreCustom.h | 66 + .../include/byteir/Dialect/Byre/ByreOps.td | 37 + .../Conversion/HloToByreTensor/CMakeLists.txt | 1 + .../HloToByreTensor/HloToByreCustom.cpp | 275 + compiler/lib/Dialect/Byre/IR/ByreDialect.cpp | 8 + compiler/lib/Pipelines/ByreTensorOpt.cpp | 5 + external_libs/external/cutlass | 1 + external_libs/external/half/LICENSE.txt | 21 + external_libs/external/half/README.txt | 317 ++ .../external/half/include/half/half.hpp | 4601 +++++++++++++++++ external_libs/runtime/CMakeLists.txt | 16 + external_libs/runtime/README.md | 14 + .../runtime/flash_attn/CMakeLists.txt | 1 + .../runtime/flash_attn/include/flash_api.h | 95 + .../runtime/flash_attn/lib/CMakeLists.txt | 40 + external_libs/runtime/flash_attn/lib/alibi.h | 62 + .../runtime/flash_attn/lib/block_info.h | 46 + .../runtime/flash_attn/lib}/flash.h | 54 +- .../runtime/flash_attn/lib/flash_api.cu | 782 +++ .../lib/flash_bwd_hdim128_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim128_fp16_sm80.cu | 10 + .../lib/flash_bwd_hdim160_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim160_fp16_sm80.cu | 10 + .../lib/flash_bwd_hdim192_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim192_fp16_sm80.cu | 10 + .../lib/flash_bwd_hdim224_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim224_fp16_sm80.cu | 10 + .../lib/flash_bwd_hdim256_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim256_fp16_sm80.cu | 10 + .../lib/flash_bwd_hdim32_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim32_fp16_sm80.cu | 10 + .../lib/flash_bwd_hdim64_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim64_fp16_sm80.cu | 10 + .../lib/flash_bwd_hdim96_bf16_sm80.cu | 10 + .../lib/flash_bwd_hdim96_fp16_sm80.cu | 10 + .../runtime/flash_attn/lib/flash_bwd_kernel.h | 1639 ++++++ .../lib/flash_bwd_launch_template.h | 364 ++ .../lib/flash_fwd_hdim128_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim128_fp16_sm80.cu | 10 + .../lib/flash_fwd_hdim160_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim160_fp16_sm80.cu | 10 + .../lib/flash_fwd_hdim192_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim192_fp16_sm80.cu | 10 + .../lib/flash_fwd_hdim224_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim224_fp16_sm80.cu | 10 + .../lib/flash_fwd_hdim256_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim256_fp16_sm80.cu | 10 + .../lib/flash_fwd_hdim32_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim32_fp16_sm80.cu | 10 + .../lib/flash_fwd_hdim64_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim64_fp16_sm80.cu | 10 + .../lib/flash_fwd_hdim96_bf16_sm80.cu | 10 + .../lib/flash_fwd_hdim96_fp16_sm80.cu | 10 + .../runtime/flash_attn/lib/flash_fwd_kernel.h | 1768 +++++++ .../lib/flash_fwd_launch_template.h | 356 ++ .../lib/flash_fwd_split_hdim128_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim128_fp16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim160_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim160_fp16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim192_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim192_fp16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim224_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim224_fp16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim256_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim256_fp16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim32_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim32_fp16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim64_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim64_fp16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim96_bf16_sm80.cu | 7 + .../lib/flash_fwd_split_hdim96_fp16_sm80.cu | 7 + .../runtime/flash_attn/lib/kernel_traits.h | 397 ++ .../flash_attn/lib/kernel_traits_sm90.h | 159 + .../runtime/flash_attn/lib}/philox.cuh | 7 +- .../runtime/flash_attn/lib/softmax.h | 283 + .../runtime/flash_attn/lib}/static_switch.h | 0 external_libs/runtime/flash_attn/lib/utils.h | 521 ++ runtime/cmake/CMakeLists.txt | 27 - runtime/cmake/brt_provider_cuda.cmake | 42 - .../{flash_attn => custom}/op_registration.h | 2 +- .../include/brt/core/framework/op_accessor.h | 2 + .../cuda/providers/default/cuda_provider.cc | 9 +- .../cuda/providers/default/custom/custom.cc | 87 + .../flash_attn_bwd.h => custom/custom.h} | 17 +- .../{flash_attn => custom}/op_registration.cc | 18 +- .../default/flash_attn/flash_attn_bwd.cc | 299 -- .../default/flash_attn/flash_attn_fwd.cc | 211 - .../default/flash_attn/flash_attn_fwd.h | 33 - .../default/flash_attn/kernels/block_info.h | 56 - .../default/flash_attn/kernels/flash_api.cu | 335 -- .../default/flash_attn/kernels/flash_api.h | 60 - .../kernels/flash_bwd_hdim128_bf16_sm80.cu | 31 - .../kernels/flash_bwd_hdim128_fp16_sm80.cu | 42 - .../kernels/flash_bwd_hdim160_bf16_sm80.cu | 18 - .../kernels/flash_bwd_hdim160_fp16_sm80.cu | 18 - .../kernels/flash_bwd_hdim192_bf16_sm80.cu | 18 - .../kernels/flash_bwd_hdim192_fp16_sm80.cu | 18 - .../kernels/flash_bwd_hdim224_bf16_sm80.cu | 18 - .../kernels/flash_bwd_hdim224_fp16_sm80.cu | 18 - .../kernels/flash_bwd_hdim256_bf16_sm80.cu | 18 - .../kernels/flash_bwd_hdim256_fp16_sm80.cu | 18 - .../kernels/flash_bwd_hdim32_bf16_sm80.cu | 27 - .../kernels/flash_bwd_hdim32_fp16_sm80.cu | 26 - .../kernels/flash_bwd_hdim64_bf16_sm80.cu | 26 - .../kernels/flash_bwd_hdim64_fp16_sm80.cu | 61 - .../kernels/flash_bwd_hdim96_bf16_sm80.cu | 31 - .../kernels/flash_bwd_hdim96_fp16_sm80.cu | 34 - .../flash_attn/kernels/flash_bwd_kernel.h | 2004 ------- .../kernels/flash_bwd_launch_template.h | 501 -- .../kernels/flash_fwd_hdim128_bf16_sm80.cu | 29 - .../kernels/flash_fwd_hdim128_fp16_sm80.cu | 50 - .../kernels/flash_fwd_hdim160_bf16_sm80.cu | 26 - .../kernels/flash_fwd_hdim160_fp16_sm80.cu | 44 - .../kernels/flash_fwd_hdim192_bf16_sm80.cu | 26 - .../kernels/flash_fwd_hdim192_fp16_sm80.cu | 44 - .../kernels/flash_fwd_hdim224_bf16_sm80.cu | 17 - .../kernels/flash_fwd_hdim224_fp16_sm80.cu | 17 - .../kernels/flash_fwd_hdim256_bf16_sm80.cu | 17 - .../kernels/flash_fwd_hdim256_fp16_sm80.cu | 17 - .../kernels/flash_fwd_hdim32_bf16_sm80.cu | 17 - .../kernels/flash_fwd_hdim32_fp16_sm80.cu | 35 - .../kernels/flash_fwd_hdim64_bf16_sm80.cu | 29 - .../kernels/flash_fwd_hdim64_fp16_sm80.cu | 40 - .../kernels/flash_fwd_hdim96_bf16_sm80.cu | 26 - .../kernels/flash_fwd_hdim96_fp16_sm80.cu | 38 - .../flash_attn/kernels/flash_fwd_kernel.h | 732 --- .../kernels/flash_fwd_launch_template.h | 343 -- .../flash_attn/kernels/kernel_traits.h | 392 -- .../flash_attn/kernels/kernel_traits_sm90.h | 169 - .../default/flash_attn/kernels/softmax.h | 332 -- .../default/flash_attn/kernels/utils.h | 433 -- runtime/lib/core/framework/op_accessor.cc | 32 + .../default/kernel/flash_attn_bwd_test.cc | 4 - .../default/kernel/flash_attn_fwd_test.cc | 204 +- runtime/test/test_files/flash_attn_bwd.mlir | 6 +- runtime/test/test_files/flash_attn_fwd.mlir | 2 +- .../test/test_files/flash_attn_kvcache.mlir | 13 + ...ash_attn_kvcache_inputs_cache_seqlens.data | 1 + .../flash_attn_kvcache_inputs_k.data | 1 + .../flash_attn_kvcache_inputs_kcache.data | 1 + .../flash_attn_kvcache_inputs_q.data | 1 + .../flash_attn_kvcache_inputs_v.data | 1 + .../flash_attn_kvcache_inputs_vcache.data | 1 + .../flash_attn_kvcache_outputs.data | 1 + .../flash_attn_kvcache_outputs_kcache.data | 1 + .../flash_attn_kvcache_outputs_vcache.data | 1 + .../generate_flash_attn_ground_truth.py | 142 +- scripts/e2e/build_and_test_e2e.sh | 2 +- scripts/runtime/build_and_test.sh | 6 - 149 files changed, 12822 insertions(+), 6964 deletions(-) create mode 100644 compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h create mode 100644 compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp create mode 160000 external_libs/external/cutlass create mode 100644 external_libs/external/half/LICENSE.txt create mode 100644 external_libs/external/half/README.txt create mode 100644 external_libs/external/half/include/half/half.hpp create mode 100644 external_libs/runtime/CMakeLists.txt create mode 100644 external_libs/runtime/README.md create mode 100644 external_libs/runtime/flash_attn/CMakeLists.txt create mode 100644 external_libs/runtime/flash_attn/include/flash_api.h create mode 100644 external_libs/runtime/flash_attn/lib/CMakeLists.txt create mode 100644 external_libs/runtime/flash_attn/lib/alibi.h create mode 100644 external_libs/runtime/flash_attn/lib/block_info.h rename {runtime/lib/backends/cuda/providers/default/flash_attn/kernels => external_libs/runtime/flash_attn/lib}/flash.h (73%) create mode 100644 external_libs/runtime/flash_attn/lib/flash_api.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h create mode 100644 external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_kernel.h create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_bf16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_fp16_sm80.cu create mode 100644 external_libs/runtime/flash_attn/lib/kernel_traits.h create mode 100644 external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h rename {runtime/lib/backends/cuda/providers/default/flash_attn/kernels => external_libs/runtime/flash_attn/lib}/philox.cuh (97%) create mode 100644 external_libs/runtime/flash_attn/lib/softmax.h rename {runtime/lib/backends/cuda/providers/default/flash_attn/kernels => external_libs/runtime/flash_attn/lib}/static_switch.h (100%) create mode 100644 external_libs/runtime/flash_attn/lib/utils.h rename runtime/include/brt/backends/cuda/providers/default/{flash_attn => custom}/op_registration.h (94%) create mode 100644 runtime/lib/backends/cuda/providers/default/custom/custom.cc rename runtime/lib/backends/cuda/providers/default/{flash_attn/flash_attn_bwd.h => custom/custom.h} (71%) rename runtime/lib/backends/cuda/providers/default/{flash_attn => custom}/op_registration.cc (67%) delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_bwd.cc delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.cc delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/block_info.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_kernel.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_launch_template.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_bf16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_fp16_sm80.cu delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_kernel.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_launch_template.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits_sm90.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/softmax.h delete mode 100644 runtime/lib/backends/cuda/providers/default/flash_attn/kernels/utils.h create mode 100644 runtime/test/test_files/flash_attn_kvcache.mlir create mode 100644 runtime/test/test_files/flash_attn_kvcache_inputs_cache_seqlens.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_inputs_k.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_inputs_kcache.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_inputs_q.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_inputs_v.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_inputs_vcache.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_outputs.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_outputs_kcache.data create mode 100644 runtime/test/test_files/flash_attn_kvcache_outputs_vcache.data diff --git a/compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h b/compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h new file mode 100644 index 000000000..34f1b66ac --- /dev/null +++ b/compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h @@ -0,0 +1,66 @@ +//===- HloToByreCustom.h ---------------------------------------*--- C++-*-===// +// +// Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#ifndef BYTEIR_CONVERSION_HLOTOBYRETENSOR_HLOTOBYRECUSTOM_H +#define BYTEIR_CONVERSION_HLOTOBYRETENSOR_HLOTOBYRECUSTOM_H + +#include "mhlo/IR/hlo_ops.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace mlir { +// forward decl +namespace func { +class FuncOp; +} // namespace func +class Operation; + +// abstract struct for convert rule +struct ByreCustomConvertRuleBase { + ByreCustomConvertRuleBase(){}; + ~ByreCustomConvertRuleBase() {} + + virtual llvm::StringRef getCustomLibPath(llvm::StringRef callee) { + return ""; + } + + virtual llvm::StringRef getApiName(llvm::StringRef callee) { return ""; } + + virtual ArrayAttr getExtraArgs(mhlo::CustomCallOp op, + PatternRewriter &rewriter) { + return {}; + } +}; + +// convert rules for cuda custom ops +struct CudaCustomConvertRule : public ByreCustomConvertRuleBase { + llvm::StringRef getCustomLibPath(llvm::StringRef callee) override; + llvm::StringRef getApiName(llvm::StringRef callee) override; + ArrayAttr getExtraArgs(mhlo::CustomCallOp op, + PatternRewriter &rewriter) override; +}; + +// use ByreCustomConvertRuleBase to decide how to convert to byre custom op +std::unique_ptr> +createConvertHloToByreCustomPass(ByreCustomConvertRuleBase *); + +} // namespace mlir + +#endif // BYTEIR_CONVERSION_HLOTOBYRETENSOR_HLOTOBYRECUSTOM_H diff --git a/compiler/include/byteir/Dialect/Byre/ByreOps.td b/compiler/include/byteir/Dialect/Byre/ByreOps.td index c014dd37a..5e860a254 100644 --- a/compiler/include/byteir/Dialect/Byre/ByreOps.td +++ b/compiler/include/byteir/Dialect/Byre/ByreOps.td @@ -178,4 +178,41 @@ def Byre_AliasOp let hasVerifier = 1; } +def Byre_CustomOp : Byre_Op<"custom", + [HasParent<"func::FuncOp">, ByreInterface]> { + let summary = "compute custom operation passed by library path and api name. "; + let description = [{ + Example: + ```mlir + %2 = byre.custom(%0, %1) { lib_path = "xxx.so", api_name = "add", extra_args = [0 : i64, 1 : i64, 2.0 : f32] } : (f32, f32) -> f32 + ``` + During execution, "xxx.so" will be loaded, and "add" function will be called. + }]; + + let arguments = (ins + StrAttr:$lib_path, + StrAttr:$api_name, + Variadic:$operands, + ArrayAttr:$extra_args + ); + + let results = (outs + Variadic:$results + ); + + let extraClassDeclaration = [{ + FunctionType getType(); + + /// Get the argument operands to the called function. + operand_range getArgOperands() { + return {arg_operand_begin(), arg_operand_end()}; + } + + operand_iterator arg_operand_begin() { return operand_begin(); } + operand_iterator arg_operand_end() { return operand_end(); } + }]; + + let hasVerifier = 1; +} + #endif // BYTEIR_DIALECT_BYRE_BYRE_OPS diff --git a/compiler/lib/Conversion/HloToByreTensor/CMakeLists.txt b/compiler/lib/Conversion/HloToByreTensor/CMakeLists.txt index c78a7b49c..b14100add 100644 --- a/compiler/lib/Conversion/HloToByreTensor/CMakeLists.txt +++ b/compiler/lib/Conversion/HloToByreTensor/CMakeLists.txt @@ -1,4 +1,5 @@ add_byteir_conversion_library(ByteIRHloToByreTensor + HloToByreCustom.cpp HloToByreTensor.cpp ADDITIONAL_HEADER_DIRS diff --git a/compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp b/compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp new file mode 100644 index 000000000..6cb0f2909 --- /dev/null +++ b/compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp @@ -0,0 +1,275 @@ +//===- HloToByreCustom.cpp ------------------------------------*--- C++ -*-===// +// +// Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include "byteir/Conversion/HloToByreTensor/HloToByreCustom.h" +#include "byteir/Dialect/Byre/ByreDialect.h" +#include "byteir/Dialect/Byre/Common.h" +#include "byteir/Dialect/mhlo/Util/CustomCallUtil.h" +#include "byteir/Utils/Utils.h" +#include "mlir/AsmParser/AsmParser.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +#include "../PassDetail.h" + +using namespace mlir; +using namespace llvm; + +class ConvertHloToByreCustomPass : public ::mlir::OperationPass { +public: + using Base = ConvertHloToByreCustomPass; + + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertHloToByreCustomPass) + + ConvertHloToByreCustomPass() + : ::mlir::OperationPass( + ::mlir::TypeID::get()) {} + + ConvertHloToByreCustomPass(const ConvertHloToByreCustomPass &other) + : ::mlir::OperationPass(other) {} + + explicit ConvertHloToByreCustomPass(ByreCustomConvertRuleBase *converter) + : ::mlir::OperationPass( + ::mlir::TypeID::get()), + converter(converter) {} + + ::llvm::StringRef getDescription() const override { + return "Convert hlo ops to byre custom ops."; + } + + /// Returns the derived pass name. + static constexpr ::llvm::StringLiteral getPassName() { + return ::llvm::StringLiteral("ConvertHloToByreCustomPass"); + } + ::llvm::StringRef getName() const override { + return "ConvertHloToByreCustomPass"; + } + + /// Support isa/dyn_cast functionality for the derived pass class. + static bool classof(const ::mlir::Pass *pass) { + return pass->getTypeID() == + ::mlir::TypeID::get(); + } + + /// A clone method to create a copy of this pass. + std::unique_ptr<::mlir::Pass> clonePass() const override { + return std::make_unique( + *static_cast(this)); + } + + /// Return the dialect that must be loaded in the context before this pass. + void getDependentDialects(::mlir::DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + registry.insert(); + } + + void runOnOperation() override; + +protected: + ByreCustomConvertRuleBase *converter = nullptr; +}; + +namespace { +constexpr StringRef getFlashAttnLibPath() { + return "external_libs/libflash_attn.so"; +} +constexpr StringRef getFlashAttnFwdAPI() { return "run_flash_attn_fwd"; } +constexpr StringRef getFlashAttnBwdAPI() { return "run_flash_attn_bwd"; } +constexpr StringRef getFlashAttnKVCacheAPI() { + return "run_flash_attn_kvcache"; +} +} // namespace + +StringRef mlir::CudaCustomConvertRule::getCustomLibPath(StringRef callee) { + if (callee == getFlashAttnFwdName() || callee == getFlashAttnBwdName()) { + return getFlashAttnLibPath(); + } + return ""; +} + +StringRef mlir::CudaCustomConvertRule::getApiName(StringRef callee) { + if (callee == getFlashAttnFwdName()) { + return getFlashAttnFwdAPI(); + } else if (callee == getFlashAttnBwdName()) { + return getFlashAttnBwdAPI(); + } + return ""; +} + +ArrayAttr mlir::CudaCustomConvertRule::getExtraArgs(mhlo::CustomCallOp op, + PatternRewriter &rewriter) { + SmallVector extraArgs; + auto callee = op.getCallTargetName(); + if (callee == getFlashAttnFwdName() || callee == getFlashAttnBwdName()) { + ShapedType qShapeTy; + ShapedType kShapeTy; + ShapedType vShapeTy; + ShapedType oShapeTy; + if (callee == getFlashAttnFwdName()) { + qShapeTy = op.getOperand(0).getType().dyn_cast(); + kShapeTy = op.getOperand(1).getType().dyn_cast(); + vShapeTy = op.getOperand(2).getType().dyn_cast(); + oShapeTy = op.getResult(0).getType().dyn_cast(); + } else { + qShapeTy = op.getOperand(1).getType().dyn_cast(); + kShapeTy = op.getOperand(2).getType().dyn_cast(); + vShapeTy = op.getOperand(3).getType().dyn_cast(); + oShapeTy = op.getOperand(4).getType().dyn_cast(); + } + if (!qShapeTy || !qShapeTy.hasStaticShape() || !kShapeTy || + !kShapeTy.hasStaticShape() || !vShapeTy || !vShapeTy.hasStaticShape() || + !oShapeTy || !oShapeTy.hasStaticShape()) + assert(false && "unexpected flash attention shape!"); + + auto qShape = qShapeTy.getShape(); + auto kShape = kShapeTy.getShape(); + auto vShape = vShapeTy.getShape(); + auto oShape = oShapeTy.getShape(); + int64_t batchSizeQ = qShape[0]; + int64_t seqlenQ = qShape[1]; + int64_t numHeadsQ = qShape[2]; + int64_t headSizeQ = qShape[3]; + int64_t batchSizeK = kShape[0]; + int64_t seqlenK = kShape[1]; + int64_t numHeadsK = kShape[2]; + int64_t headSizeK = kShape[3]; + assert(headSizeQ == headSizeK && batchSizeQ == batchSizeK); + assert(headSizeQ % 8 == 0); + + auto roundMultiple = [](int x, int m) { return (x + m - 1) / m * m; }; + const int headSize = roundMultiple(headSizeQ, 8); + const int headSizeRounded = roundMultiple(headSize, 32); + const int seqlenQRounded = roundMultiple(seqlenQ, 128); + const int seqlenKRounded = roundMultiple(seqlenK, 128); + + uint32_t qBatchStride = qShape[1] * qShape[2] * qShape[3]; + uint32_t kBatchStride = kShape[1] * kShape[2] * kShape[3]; + uint32_t vBatchStride = vShape[1] * vShape[2] * vShape[3]; + uint32_t oBatchStride = oShape[1] * oShape[2] * oShape[3]; + uint32_t qRowStride = qShape[2] * qShape[3]; + uint32_t kRowStride = kShape[2] * kShape[3]; + uint32_t vRowStride = vShape[2] * vShape[3]; + uint32_t oRowStride = oShape[2] * oShape[3]; + uint32_t qHeadStride = qShape[3]; + uint32_t kHeadStride = kShape[3]; + uint32_t vHeadStride = vShape[3]; + uint32_t oHeadStride = oShape[3]; + + DictionaryAttr byteirAttrs = + op->getAttr(getCustomCallAttrName()).cast(); + if (!byteirAttrs) + assert(false && "byteir attribute not found!"); + bool causal = byteirAttrs.get("causal").cast().getValue(); + float softmaxScale = byteirAttrs.get("softmax_scale") + .cast() + .getValue() + .convertToDouble(); + float dropoutP = byteirAttrs.get("dropout_p") + .cast() + .getValue() + .convertToDouble(); + int windowSizeLeft = -1; + int windowSizeRight = -1; + // causal=true is the same as causal=false in this case + if (seqlenQ == 1) + causal = false; + if (causal) + windowSizeRight = 0; + + // extra args should match kernel api call + extraArgs.push_back(rewriter.getI64IntegerAttr(qBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(kBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(vBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(oBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(qRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(kRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(vRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(oRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(qHeadStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(kHeadStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(vHeadStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(oHeadStride)); + + extraArgs.push_back(rewriter.getI64IntegerAttr(batchSizeQ)); + extraArgs.push_back(rewriter.getI64IntegerAttr(numHeadsQ)); + extraArgs.push_back(rewriter.getI64IntegerAttr(numHeadsK)); + extraArgs.push_back(rewriter.getI64IntegerAttr(headSize)); + extraArgs.push_back(rewriter.getI64IntegerAttr(headSizeRounded)); + extraArgs.push_back(rewriter.getF32FloatAttr(softmaxScale)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenQ)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenK)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenQRounded)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenKRounded)); + extraArgs.push_back(rewriter.getF32FloatAttr(dropoutP)); + extraArgs.push_back(rewriter.getI64IntegerAttr(windowSizeLeft)); + extraArgs.push_back(rewriter.getI64IntegerAttr(windowSizeRight)); + return ArrayAttr::get(rewriter.getContext(), extraArgs); + } + return {}; +} + +struct ConvertCustomCallOpToByreCustom : public RewritePattern { + ConvertCustomCallOpToByreCustom(MLIRContext *context, + ByreCustomConvertRuleBase *converter) + : RewritePattern(MatchAnyOpTypeTag(), 1, context), converter(converter) {} + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + if (!isa(op)) + return failure(); + auto customCallOp = cast(op); + auto callee = customCallOp.getCallTargetName(); + auto libPath = converter->getCustomLibPath(callee); + if (libPath == "") + return failure(); + auto apiName = converter->getApiName(callee); + auto extraArgs = converter->getExtraArgs(customCallOp, rewriter); + + auto newOp = rewriter.create( + customCallOp.getLoc(), customCallOp.getResultTypes(), libPath, apiName, + customCallOp.getOperands(), extraArgs); + rewriter.replaceOp(op, newOp.getResults()); + return success(); + } + +private: + ByreCustomConvertRuleBase *converter; +}; + +void ConvertHloToByreCustomPass::runOnOperation() { + // early return if no converter + if (nullptr == converter) { + return; + } + + MLIRContext &ctx = getContext(); + RewritePatternSet patterns(&ctx); + auto funcOp = getOperation(); + + patterns.add(patterns.getContext(), + converter); + FrozenRewritePatternSet frozenPatterns(std::move(patterns)); + if (failed(applyPatternsAndFoldGreedily(funcOp, frozenPatterns))) { + signalPassFailure(); + } +} + +std::unique_ptr> +mlir::createConvertHloToByreCustomPass(ByreCustomConvertRuleBase *converter) { + return std::make_unique(converter); +} diff --git a/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp b/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp index aa6d81bd8..99ba25872 100644 --- a/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp +++ b/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp @@ -485,6 +485,14 @@ std::string AliasOp::getCalleeName() { return "AliasOp"; } Value AliasOp::getViewSource() { return getSource(); } +//===----------------------------------------------------------------------===// +// CustomOp +//===----------------------------------------------------------------------===/ + +LogicalResult CustomOp::verify() { + return verifyOpInEntryPointFunc(this->getOperation()); +} + // LWC: ignore Async for now // //===----------------------------------------------------------------------===// diff --git a/compiler/lib/Pipelines/ByreTensorOpt.cpp b/compiler/lib/Pipelines/ByreTensorOpt.cpp index 1510cb217..25e720544 100644 --- a/compiler/lib/Pipelines/ByreTensorOpt.cpp +++ b/compiler/lib/Pipelines/ByreTensorOpt.cpp @@ -18,6 +18,7 @@ #include "byteir/Pipelines/ByreTensorOpt.h" #include "byteir/Conversion/FuncToByre/FuncToByre.h" +#include "byteir/Conversion/HloToByreTensor/HloToByreCustom.h" #include "byteir/Conversion/HloToByreTensor/HloToByreTensor.h" #include "byteir/Dialect/Byre/ByreDialect.h" #include "byteir/Dialect/Byre/Passes.h" @@ -44,6 +45,10 @@ void createByreTensorOptPipelineImpl(OpPassManager &pm, std::string entryFunc, pm.addPass(createCanonicalizerPass()); pm.addNestedPass( createConvertHloToByreTensorPass(appendArgTypes)); + auto *gpuRule = new CudaCustomConvertRule(); + pm.addNestedPass( + createConvertHloToByreCustomPass(gpuRule)); + delete gpuRule; pm.addPass(createCanonicalizerPass()); } } // namespace diff --git a/external_libs/external/cutlass b/external_libs/external/cutlass new file mode 160000 index 000000000..a75b4ac48 --- /dev/null +++ b/external_libs/external/cutlass @@ -0,0 +1 @@ +Subproject commit a75b4ac483166189a45290783cb0a18af5ff0ea5 diff --git a/external_libs/external/half/LICENSE.txt b/external_libs/external/half/LICENSE.txt new file mode 100644 index 000000000..45f55db55 --- /dev/null +++ b/external_libs/external/half/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2012-2021 Christian Rau + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/external_libs/external/half/README.txt b/external_libs/external/half/README.txt new file mode 100644 index 000000000..3dd0d1c2d --- /dev/null +++ b/external_libs/external/half/README.txt @@ -0,0 +1,317 @@ +HALF-PRECISION FLOATING-POINT LIBRARY (Version 2.2.0) +----------------------------------------------------- + +This is a C++ header-only library to provide an IEEE 754 conformant 16-bit +half-precision floating-point type along with corresponding arithmetic +operators, type conversions and common mathematical functions. It aims for both +efficiency and ease of use, trying to accurately mimic the behaviour of the +built-in floating-point types at the best performance possible. + + +INSTALLATION AND REQUIREMENTS +----------------------------- + +Conveniently, the library consists of just a single header file containing all +the functionality, which can be directly included by your projects, without the +neccessity to build anything or link to anything. + +Whereas this library is fully C++98-compatible, it can profit from certain +C++11 features. Support for those features is checked automatically at compile +(or rather preprocessing) time, but can be explicitly enabled or disabled by +predefining the corresponding preprocessor symbols to either 1 or 0 yourself +before including half.hpp. This is useful when the automatic detection fails +(for more exotic implementations) or when a feature should be explicitly +disabled: + + - 'long long' integer type for mathematical functions returning 'long long' + results (enabled for VC++ 2003 and icc 11.1 and newer, gcc and clang, + overridable with 'HALF_ENABLE_CPP11_LONG_LONG'). + + - Static assertions for extended compile-time checks (enabled for VC++ 2010, + gcc 4.3, clang 2.9, icc 11.1 and newer, overridable with + 'HALF_ENABLE_CPP11_STATIC_ASSERT'). + + - Generalized constant expressions (enabled for VC++ 2015, gcc 4.6, clang 3.1, + icc 14.0 and newer, overridable with 'HALF_ENABLE_CPP11_CONSTEXPR'). + + - noexcept exception specifications (enabled for VC++ 2015, gcc 4.6, + clang 3.0, icc 14.0 and newer, overridable with 'HALF_ENABLE_CPP11_NOEXCEPT'). + + - User-defined literals for half-precision literals to work (enabled for + VC++ 2015, gcc 4.7, clang 3.1, icc 15.0 and newer, overridable with + 'HALF_ENABLE_CPP11_USER_LITERALS'). + + - Thread-local storage for per-thread floating-point exception flags (enabled + for VC++ 2015, gcc 4.8, clang 3.3, icc 15.0 and newer, overridable with + 'HALF_ENABLE_CPP11_THREAD_LOCAL'). + + - Type traits and template meta-programming features from + (enabled for VC++ 2010, libstdc++ 4.3, libc++ and newer, overridable with + 'HALF_ENABLE_CPP11_TYPE_TRAITS'). + + - Special integer types from (enabled for VC++ 2010, libstdc++ 4.3, + libc++ and newer, overridable with 'HALF_ENABLE_CPP11_CSTDINT'). + + - Certain C++11 single-precision mathematical functions from for + floating-point classification during conversions from higher precision types + (enabled for VC++ 2013, libstdc++ 4.3, libc++ and newer, overridable with + 'HALF_ENABLE_CPP11_CMATH'). + + - Floating-point environment control from for possible exception + propagation to the built-in floating-point platform (enabled for VC++ 2013, + libstdc++ 4.3, libc++ and newer, overridable with 'HALF_ENABLE_CPP11_CFENV'). + + - Hash functor 'std::hash' from (enabled for VC++ 2010, + libstdc++ 4.3, libc++ and newer, overridable with 'HALF_ENABLE_CPP11_HASH'). + +The library has been tested successfully with Visual C++ 2005-2015, gcc 4-8 +and clang 3-8 on 32- and 64-bit x86 systems. Please contact me if you have any +problems, suggestions or even just success testing it on other platforms. + + +DOCUMENTATION +------------- + +What follows are some general words about the usage of the library and its +implementation. For a complete documentation of its interface consult the +corresponding website http://half.sourceforge.net. You may also generate the +complete developer documentation from the library's only include file's doxygen +comments, but this is more relevant to developers rather than mere users. + +BASIC USAGE + +To make use of the library just include its only header file half.hpp, which +defines all half-precision functionality inside the 'half_float' namespace. The +actual 16-bit half-precision data type is represented by the 'half' type, which +uses the standard IEEE representation with 1 sign bit, 5 exponent bits and 11 +mantissa bits (including the hidden bit) and supports all types of special +values, like subnormal values, infinity and NaNs. This type behaves like the +built-in floating-point types as much as possible, supporting the usual +arithmetic, comparison and streaming operators, which makes its use pretty +straight-forward: + + using half_float::half; + half a(3.4), b(5); + half c = a * b; + c += 3; + if(c > a) + std::cout << c << std::endl; + +Additionally the 'half_float' namespace also defines half-precision versions +for all mathematical functions of the C++ standard library, which can be used +directly through ADL: + + half a(-3.14159); + half s = sin(abs(a)); + long l = lround(s); + +You may also specify explicit half-precision literals, since the library +provides a user-defined literal inside the 'half_float::literal' namespace, +which you just need to import (assuming support for C++11 user-defined literals): + + using namespace half_float::literal; + half x = 1.0_h; + +Furthermore the library provides proper specializations for +'std::numeric_limits', defining various implementation properties, and +'std::hash' for hashing half-precision numbers (assuming support for C++11 +'std::hash'). Similar to the corresponding preprocessor symbols from +the library also defines the 'HUGE_VALH' constant and maybe the 'FP_FAST_FMAH' +symbol. + +CONVERSIONS AND ROUNDING + +The half is explicitly constructible/convertible from a single-precision float +argument. Thus it is also explicitly constructible/convertible from any type +implicitly convertible to float, but constructing it from types like double or +int will involve the usual warnings arising when implicitly converting those to +float because of the lost precision. On the one hand those warnings are +intentional, because converting those types to half neccessarily also reduces +precision. But on the other hand they are raised for explicit conversions from +those types, when the user knows what he is doing. So if those warnings keep +bugging you, then you won't get around first explicitly converting to float +before converting to half, or use the 'half_cast' described below. In addition +you can also directly assign float values to halfs. + +In contrast to the float-to-half conversion, which reduces precision, the +conversion from half to float (and thus to any other type implicitly +convertible from float) is implicit, because all values represetable with +half-precision are also representable with single-precision. This way the +half-to-float conversion behaves similar to the builtin float-to-double +conversion and all arithmetic expressions involving both half-precision and +single-precision arguments will be of single-precision type. This way you can +also directly use the mathematical functions of the C++ standard library, +though in this case you will invoke the single-precision versions which will +also return single-precision values, which is (even if maybe performing the +exact same computation, see below) not as conceptually clean when working in a +half-precision environment. + +The default rounding mode for conversions between half and more precise types +as well as for rounding results of arithmetic operations and mathematical +functions rounds to the nearest representable value. But by predefining the +'HALF_ROUND_STYLE' preprocessor symbol this default can be overridden with one +of the other standard rounding modes using their respective constants or the +equivalent values of 'std::float_round_style' (it can even be synchronized with +the built-in single-precision implementation by defining it to +'std::numeric_limits::round_style'): + + - 'std::round_indeterminate' (-1) for the fastest rounding. + + - 'std::round_toward_zero' (0) for rounding toward zero. + + - 'std::round_to_nearest' (1) for rounding to the nearest value (default). + + - 'std::round_toward_infinity' (2) for rounding toward positive infinity. + + - 'std::round_toward_neg_infinity' (3) for rounding toward negative infinity. + +In addition to changing the overall default rounding mode one can also use the +'half_cast'. This converts between half and any built-in arithmetic type using +a configurable rounding mode (or the default rounding mode if none is +specified). In addition to a configurable rounding mode, 'half_cast' has +another big difference to a mere 'static_cast': Any conversions are performed +directly using the given rounding mode, without any intermediate conversion +to/from 'float'. This is especially relevant for conversions to integer types, +which don't necessarily truncate anymore. But also for conversions from +'double' or 'long double' this may produce more precise results than a +pre-conversion to 'float' using the single-precision implementation's current +rounding mode would. + + half a = half_cast(4.2); + half b = half_cast::round_style>(4.2f); + assert( half_cast( 0.7_h ) == 1 ); + assert( half_cast( 4097 ) == 4096.0_h ); + assert( half_cast( 4097 ) == 4100.0_h ); + assert( half_cast( std::numeric_limits::min() ) > 0.0_h ); + +ACCURACY AND PERFORMANCE + +From version 2.0 onward the library is implemented without employing the +underlying floating-point implementation of the system (except for conversions, +of course), providing an entirely self-contained half-precision implementation +with results independent from the system's existing single- or double-precision +implementation and its rounding behaviour. + +As to accuracy, many of the operators and functions provided by this library +are exact to rounding for all rounding modes, i.e. the error to the exact +result is at most 0.5 ULP (unit in the last place) for rounding to nearest and +less than 1 ULP for all other rounding modes. This holds for all the operations +required by the IEEE 754 standard and many more. Specifically the following +functions might exhibit a deviation from the correctly rounded exact result by +1 ULP for a select few input values: 'expm1', 'log1p', 'pow', 'atan2', 'erf', +'erfc', 'lgamma', 'tgamma' (for more details see the documentation of the +individual functions). All other functions and operators are always exact to +rounding or independent of the rounding mode altogether. + +The increased IEEE-conformance and cleanliness of this implementation comes +with a certain performance cost compared to doing computations and mathematical +functions in hardware-accelerated single-precision. On average and depending on +the platform, the arithemtic operators are about 75% as fast and the +mathematical functions about 33-50% as fast as performing the corresponding +operations in single-precision and converting between the inputs and outputs. +However, directly computing with half-precision values is a rather rare +use-case and usually using actual 'float' values for all computations and +temproraries and using 'half's only for storage is the recommended way. But +nevertheless the goal of this library was to provide a complete and +conceptually clean IEEE-confromant half-precision implementation and in the few +cases when you do need to compute directly in half-precision you do so for a +reason and want accurate results. + +If necessary, this internal implementation can be overridden by predefining the +'HALF_ARITHMETIC_TYPE' preprocessor symbol to one of the built-in +floating-point types ('float', 'double' or 'long double'), which will cause the +library to use this type for computing arithmetic operations and mathematical +functions (if available). However, due to using the platform's floating-point +implementation (and its rounding behaviour) internally, this might cause +results to deviate from the specified half-precision rounding mode. It will of +course also inhibit the automatic exception detection described below. + +The conversion operations between half-precision and single-precision types can +also make use of the F16C extension for x86 processors by using the +corresponding compiler intrinsics from . Support for this is +checked at compile-time by looking for the '__F16C__' macro which at least gcc +and clang define based on the target platform. It can also be enabled manually +by predefining the 'HALF_ENABLE_F16C_INTRINSICS' preprocessor symbol to 1, or 0 +for explicitly disabling it. However, this will directly use the corresponding +intrinsics for conversion without checking if they are available at runtime +(possibly crashing if they are not), so make sure they are supported on the +target platform before enabling this. + +EXCEPTION HANDLING + +The half-precision implementation supports all 5 required floating-point +exceptions from the IEEE standard to indicate erroneous inputs or inexact +results during operations. These are represented by exception flags which +actually use the same values as the corresponding 'FE_...' flags defined in +C++11's header if supported, specifically: + + - 'FE_INVALID' for invalid inputs to an operation. + - 'FE_DIVBYZERO' for finite inputs producing infinite results. + - 'FE_OVERFLOW' if a result is too large to represent finitely. + - 'FE_UNDERFLOW' for a subnormal or zero result after rounding. + - 'FE_INEXACT' if a result needed rounding to be representable. + - 'FE_ALL_EXCEPT' as a convenient OR of all possible exception flags. + +The internal exception flag state will start with all flags cleared and is +maintained per thread if C++11 thread-local storage is supported, otherwise it +will be maintained globally and will theoretically NOT be thread-safe (while +practically being as thread-safe as a simple integer variable can be). These +flags can be managed explicitly using the library's error handling functions, +which again try to mimic the built-in functions for handling floating-point +exceptions from . You can clear them with 'feclearexcept' (which is the +only way a flag can be cleared), test them with 'fetestexcept', explicitly +raise errors with 'feraiseexcept' and save and restore their state using +'fegetexceptflag' and 'fesetexceptflag'. You can also throw corresponding C++ +exceptions based on the current flag state using 'fethrowexcept'. + +However, any automatic exception detection and handling during half-precision +operations and functions is DISABLED by default, since it comes with a minor +performance overhead due to runtime checks, and reacting to IEEE floating-point +exceptions is rarely ever needed in application code. But the library fully +supports IEEE-conformant detection of floating-point exceptions and various +ways for handling them, which can be enabled by pre-defining the corresponding +preprocessor symbols to 1. They can be enabled individually or all at once and +they will be processed in the order they are listed here: + + - 'HALF_ERRHANDLING_FLAGS' sets the internal exception flags described above + whenever the corresponding exception occurs. + - 'HALF_ERRHANDLING_ERRNO' sets the value of 'errno' from similar to + the behaviour of the built-in floating-point types when 'MATH_ERRNO' is used. + - 'HALF_ERRHANDLING_FENV' will propagate exceptions to the built-in + floating-point implementation using 'std::feraiseexcept' if support for + C++11 floating-point control is enabled. However, this does not synchronize + exceptions: neither will clearing propagate nor will it work in reverse. + - 'HALF_ERRHANDLING_THROW_...' can be defined to a string literal which will + be used as description message for a C++ exception that is thrown whenever + a 'FE_...' exception occurs, similar to the behaviour of 'fethrowexcept'. + +If any of the above error handling is activated, non-quiet operations on +half-precision values will also raise a 'FE_INVALID' exception whenever +they encounter a signaling NaN value, in addition to transforming the value +into a quiet NaN. If error handling is disabled, signaling NaNs will be +treated like quiet NaNs (while still getting explicitly quieted if propagated +to the result). There can also be additional treatment of overflow and +underflow errors after they have been processed as above, which is ENABLED by +default (but of course only takes effect if any other exception handling is +activated) unless overridden by pre-defining the corresponding preprocessor +symbol to 0: + + - 'HALF_ERRHANDLING_OVERFLOW_TO_INEXACT' will cause overflow errors to also + raise a 'FE_INEXACT' exception. + - 'HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT' will cause underflow errors to also + raise a 'FE_INEXACT' exception. This will also slightly change the + behaviour of the underflow exception, which will ONLY be raised if the + result is actually inexact due to underflow. If this is disabled, underflow + exceptions will be raised for ANY (possibly exact) subnormal result. + + +CREDITS AND CONTACT +------------------- + +This library is developed by CHRISTIAN RAU and released under the MIT License +(see LICENSE.txt). If you have any questions or problems with it, feel free to +contact me at rauy@users.sourceforge.net. + +Additional credit goes to JEROEN VAN DER ZIJP for his paper on "Fast Half Float +Conversions", whose algorithms have been used in the library for converting +between half-precision and single-precision values. diff --git a/external_libs/external/half/include/half/half.hpp b/external_libs/external/half/include/half/half.hpp new file mode 100644 index 000000000..f4d861463 --- /dev/null +++ b/external_libs/external/half/include/half/half.hpp @@ -0,0 +1,4601 @@ +// half - IEEE 754-based half-precision floating-point library. +// +// Copyright (c) 2012-2021 Christian Rau +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +// Version 2.2.0 + +/// \file +/// Main header file for half-precision functionality. + +#ifndef HALF_HALF_HPP +#define HALF_HALF_HPP + +#define HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__) + +#if defined(__INTEL_COMPILER) + #define HALF_ICC_VERSION __INTEL_COMPILER +#elif defined(__ICC) + #define HALF_ICC_VERSION __ICC +#elif defined(__ICL) + #define HALF_ICC_VERSION __ICL +#else + #define HALF_ICC_VERSION 0 +#endif + +// check C++11 language features +#if defined(__clang__) // clang + #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if __has_feature(cxx_thread_local) && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif +#elif HALF_ICC_VERSION && defined(__INTEL_CXX11_MODE__) // Intel C++ + #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif +#elif defined(__GNUC__) // gcc + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L + #if HALF_GCC_VERSION >= 408 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if HALF_GCC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif + #endif + #define HALF_TWOS_COMPLEMENT_INT 1 +#elif defined(_MSC_VER) // Visual C++ + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif + #define HALF_TWOS_COMPLEMENT_INT 1 + #define HALF_POP_WARNINGS 1 + #pragma warning(push) + #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned +#endif + +// check C++11 library features +#include +#if defined(_LIBCPP_VERSION) // libc++ + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 + #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #ifndef HALF_ENABLE_CPP11_CSTDINT + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #ifndef HALF_ENABLE_CPP11_CMATH + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #ifndef HALF_ENABLE_CPP11_HASH + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #ifndef HALF_ENABLE_CPP11_CFENV + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #endif +#elif defined(__GLIBCXX__) // libstdc++ + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 + #ifdef __clang__ + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #else + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #endif + #endif +#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif +#endif +#undef HALF_GCC_VERSION +#undef HALF_ICC_VERSION + +// any error throwing C++ exceptions? +#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT) +#define HALF_ERRHANDLING_THROWS 1 +#endif + +// any error handling enabled? +#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) + +#if HALF_ERRHANDLING + #define HALF_UNUSED_NOERR(name) name +#else + #define HALF_UNUSED_NOERR(name) +#endif + +// support constexpr +#if HALF_ENABLE_CPP11_CONSTEXPR + #define HALF_CONSTEXPR constexpr + #define HALF_CONSTEXPR_CONST constexpr + #if HALF_ERRHANDLING + #define HALF_CONSTEXPR_NOERR + #else + #define HALF_CONSTEXPR_NOERR constexpr + #endif +#else + #define HALF_CONSTEXPR + #define HALF_CONSTEXPR_CONST const + #define HALF_CONSTEXPR_NOERR +#endif + +// support noexcept +#if HALF_ENABLE_CPP11_NOEXCEPT + #define HALF_NOEXCEPT noexcept + #define HALF_NOTHROW noexcept +#else + #define HALF_NOEXCEPT + #define HALF_NOTHROW throw() +#endif + +// support thread storage +#if HALF_ENABLE_CPP11_THREAD_LOCAL + #define HALF_THREAD_LOCAL thread_local +#else + #define HALF_THREAD_LOCAL static +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if HALF_ENABLE_CPP11_TYPE_TRAITS + #include +#endif +#if HALF_ENABLE_CPP11_CSTDINT + #include +#endif +#if HALF_ERRHANDLING_ERRNO + #include +#endif +#if HALF_ENABLE_CPP11_CFENV + #include +#endif +#if HALF_ENABLE_CPP11_HASH + #include +#endif + + +#ifndef HALF_ENABLE_F16C_INTRINSICS + /// Enable F16C intruction set intrinsics. + /// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting between + /// half-precision and single-precision values which may result in improved performance. This will not perform additional checks + /// for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature. + /// + /// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms. + #define HALF_ENABLE_F16C_INTRINSICS __F16C__ +#endif +#if HALF_ENABLE_F16C_INTRINSICS + #include +#endif + +#ifdef HALF_DOXYGEN_ONLY +/// Type for internal floating-point computations. +/// This can be predefined to a built-in floating-point type (`float`, `double` or `long double`) to override the internal +/// half-precision implementation to use this type for computing arithmetic operations and mathematical function (if available). +/// This can result in improved performance for arithmetic operators and mathematical functions but might cause results to +/// deviate from the specified half-precision rounding mode and inhibits proper detection of half-precision exceptions. +#define HALF_ARITHMETIC_TYPE (undefined) + +/// Enable internal exception flags. +/// Defining this to 1 causes operations on half-precision values to raise internal floating-point exception flags according to +/// the IEEE 754 standard. These can then be cleared and checked with clearexcept(), testexcept(). +#define HALF_ERRHANDLING_FLAGS 0 + +/// Enable exception propagation to `errno`. +/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to +/// [errno](https://en.cppreference.com/w/cpp/error/errno) from ``. Specifically this will propagate domain errors as +/// [EDOM](https://en.cppreference.com/w/cpp/error/errno_macros) and pole, overflow and underflow errors as +/// [ERANGE](https://en.cppreference.com/w/cpp/error/errno_macros). Inexact errors won't be propagated. +#define HALF_ERRHANDLING_ERRNO 0 + +/// Enable exception propagation to built-in floating-point platform. +/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to the built-in +/// single- and double-precision implementation's exception flags using the +/// [C++11 floating-point environment control](https://en.cppreference.com/w/cpp/numeric/fenv) from ``. However, this +/// does not work in reverse and single- or double-precision exceptions will not raise the corresponding half-precision +/// exception flags, nor will explicitly clearing flags clear the corresponding built-in flags. +#define HALF_ERRHANDLING_FENV 0 + +/// Throw C++ exception on domain errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on domain errors. +#define HALF_ERRHANDLING_THROW_INVALID (undefined) + +/// Throw C++ exception on pole errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on pole errors. +#define HALF_ERRHANDLING_THROW_DIVBYZERO (undefined) + +/// Throw C++ exception on overflow errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::overflow_error](https://en.cppreference.com/w/cpp/error/overflow_error) with the specified message on overflows. +#define HALF_ERRHANDLING_THROW_OVERFLOW (undefined) + +/// Throw C++ exception on underflow errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::underflow_error](https://en.cppreference.com/w/cpp/error/underflow_error) with the specified message on underflows. +#define HALF_ERRHANDLING_THROW_UNDERFLOW (undefined) + +/// Throw C++ exception on rounding errors. +/// Defining this to 1 causes operations on half-precision values to throw a +/// [std::range_error](https://en.cppreference.com/w/cpp/error/range_error) with the specified message on general rounding errors. +#define HALF_ERRHANDLING_THROW_INEXACT (undefined) +#endif + +#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT +/// Raise INEXACT exception on overflow. +/// Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. +/// These will be raised after any possible handling of the underflow exception. +#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 +#endif + +#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT +/// Raise INEXACT exception on underflow. +/// Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. +/// These will be raised after any possible handling of the underflow exception. +/// +/// **Note:** This will actually cause underflow (and the accompanying inexact) exceptions to be raised *only* when the result +/// is inexact, while if disabled bare underflow errors will be raised for *any* (possibly exact) subnormal result. +#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 +#endif + +/// Default rounding mode. +/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and more precise types +/// (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical +/// functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective +/// constants or the equivalent values of +/// [std::float_round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/float_round_style): +/// +/// `std::float_round_style` | value | rounding +/// ---------------------------------|-------|------------------------- +/// `std::round_indeterminate` | -1 | fastest +/// `std::round_toward_zero` | 0 | toward zero +/// `std::round_to_nearest` | 1 | to nearest (default) +/// `std::round_toward_infinity` | 2 | toward positive infinity +/// `std::round_toward_neg_infinity` | 3 | toward negative infinity +/// +/// By default this is set to `1` (`std::round_to_nearest`), which rounds results to the nearest representable value. It can even +/// be set to [std::numeric_limits::round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/round_style) to synchronize +/// the rounding mode with that of the built-in single-precision implementation (which is likely `std::round_to_nearest`, though). +#ifndef HALF_ROUND_STYLE + #define HALF_ROUND_STYLE 1 // = std::round_to_nearest +#endif + +/// Value signaling overflow. +/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow of an +/// operation, in particular it just evaluates to positive infinity. +/// +/// **See also:** Documentation for [HUGE_VAL](https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL) +#define HUGE_VALH std::numeric_limits::infinity() + +/// Fast half-precision fma function. +/// This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate +/// half-precision multiplication followed by an addition, which is always the case. +/// +/// **See also:** Documentation for [FP_FAST_FMA](https://en.cppreference.com/w/cpp/numeric/math/fma) +#define FP_FAST_FMAH 1 + +/// Half rounding mode. +/// In correspondence with `FLT_ROUNDS` from `` this symbol expands to the rounding mode used for +/// half-precision operations. It is an alias for [HALF_ROUND_STYLE](\ref HALF_ROUND_STYLE). +/// +/// **See also:** Documentation for [FLT_ROUNDS](https://en.cppreference.com/w/cpp/types/climits/FLT_ROUNDS) +#define HLF_ROUNDS HALF_ROUND_STYLE + +#ifndef FP_ILOGB0 + #define FP_ILOGB0 INT_MIN +#endif +#ifndef FP_ILOGBNAN + #define FP_ILOGBNAN INT_MAX +#endif +#ifndef FP_SUBNORMAL + #define FP_SUBNORMAL 0 +#endif +#ifndef FP_ZERO + #define FP_ZERO 1 +#endif +#ifndef FP_NAN + #define FP_NAN 2 +#endif +#ifndef FP_INFINITE + #define FP_INFINITE 3 +#endif +#ifndef FP_NORMAL + #define FP_NORMAL 4 +#endif + +#if !HALF_ENABLE_CPP11_CFENV && !defined(FE_ALL_EXCEPT) + #define FE_INVALID 0x10 + #define FE_DIVBYZERO 0x08 + #define FE_OVERFLOW 0x04 + #define FE_UNDERFLOW 0x02 + #define FE_INEXACT 0x01 + #define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) +#endif + + +/// Main namespace for half-precision functionality. +/// This namespace contains all the functionality provided by the library. +namespace half_float +{ + class half; + +#if HALF_ENABLE_CPP11_USER_LITERALS + /// Library-defined half-precision literals. + /// Import this namespace to enable half-precision floating-point literals: + /// ~~~~{.cpp} + /// using namespace half_float::literal; + /// half_float::half = 4.2_h; + /// ~~~~ + namespace literal + { + half operator "" _h(long double); + } +#endif + + /// \internal + /// \brief Implementation details. + namespace detail + { + #if HALF_ENABLE_CPP11_TYPE_TRAITS + /// Conditional type. + template struct conditional : std::conditional {}; + + /// Helper for tag dispatching. + template struct bool_type : std::integral_constant {}; + using std::true_type; + using std::false_type; + + /// Type traits for floating-point types. + template struct is_float : std::is_floating_point {}; + #else + /// Conditional type. + template struct conditional { typedef T type; }; + template struct conditional { typedef F type; }; + + /// Helper for tag dispatching. + template struct bool_type {}; + typedef bool_type true_type; + typedef bool_type false_type; + + /// Type traits for floating-point types. + template struct is_float : false_type {}; + template struct is_float : is_float {}; + template struct is_float : is_float {}; + template struct is_float : is_float {}; + template<> struct is_float : true_type {}; + template<> struct is_float : true_type {}; + template<> struct is_float : true_type {}; + #endif + + /// Type traits for floating-point bits. + template struct bits { typedef unsigned char type; }; + template struct bits : bits {}; + template struct bits : bits {}; + template struct bits : bits {}; + + #if HALF_ENABLE_CPP11_CSTDINT + /// Unsigned integer of (at least) 16 bits width. + typedef std::uint_least16_t uint16; + + /// Fastest unsigned integer of (at least) 32 bits width. + typedef std::uint_fast32_t uint32; + + /// Fastest signed integer of (at least) 32 bits width. + typedef std::int_fast32_t int32; + + /// Unsigned integer of (at least) 32 bits width. + template<> struct bits { typedef std::uint_least32_t type; }; + + /// Unsigned integer of (at least) 64 bits width. + template<> struct bits { typedef std::uint_least64_t type; }; + #else + /// Unsigned integer of (at least) 16 bits width. + typedef unsigned short uint16; + + /// Fastest unsigned integer of (at least) 32 bits width. + typedef unsigned long uint32; + + /// Fastest unsigned integer of (at least) 32 bits width. + typedef long int32; + + /// Unsigned integer of (at least) 32 bits width. + template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; + + #if HALF_ENABLE_CPP11_LONG_LONG + /// Unsigned integer of (at least) 64 bits width. + template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; + #else + /// Unsigned integer of (at least) 64 bits width. + template<> struct bits { typedef unsigned long type; }; + #endif + #endif + + #ifdef HALF_ARITHMETIC_TYPE + /// Type to use for arithmetic computations and mathematic functions internally. + typedef HALF_ARITHMETIC_TYPE internal_t; + #endif + + /// Tag type for binary construction. + struct binary_t {}; + + /// Tag for binary construction. + HALF_CONSTEXPR_CONST binary_t binary = binary_t(); + + /// \name Implementation defined classification and arithmetic + /// \{ + + /// Check for infinity. + /// \tparam T argument type (builtin floating-point type) + /// \param arg value to query + /// \retval true if infinity + /// \retval false else + template bool builtin_isinf(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::isinf(arg); + #elif defined(_MSC_VER) + return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); + #else + return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); + #endif + } + + /// Check for NaN. + /// \tparam T argument type (builtin floating-point type) + /// \param arg value to query + /// \retval true if not a number + /// \retval false else + template bool builtin_isnan(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::isnan(arg); + #elif defined(_MSC_VER) + return ::_isnan(static_cast(arg)) != 0; + #else + return arg != arg; + #endif + } + + /// Check sign. + /// \tparam T argument type (builtin floating-point type) + /// \param arg value to query + /// \retval true if signbit set + /// \retval false else + template bool builtin_signbit(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::signbit(arg); + #else + return arg < T() || (arg == T() && T(1)/arg < T()); + #endif + } + + /// Platform-independent sign mask. + /// \param arg integer value in two's complement + /// \retval -1 if \a arg negative + /// \retval 0 if \a arg positive + inline uint32 sign_mask(uint32 arg) + { + static const int N = std::numeric_limits::digits - 1; + #if HALF_TWOS_COMPLEMENT_INT + return static_cast(arg) >> N; + #else + return -((arg>>N)&1); + #endif + } + + /// Platform-independent arithmetic right shift. + /// \param arg integer value in two's complement + /// \param i shift amount (at most 31) + /// \return \a arg right shifted for \a i bits with possible sign extension + inline uint32 arithmetic_shift(uint32 arg, int i) + { + #if HALF_TWOS_COMPLEMENT_INT + return static_cast(arg) >> i; + #else + return static_cast(arg)/(static_cast(1)<>(std::numeric_limits::digits-1))&1); + #endif + } + + /// \} + /// \name Error handling + /// \{ + + /// Internal exception flags. + /// \return reference to global exception flags + inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; } + + /// Raise floating-point exception. + /// \param flags exceptions to raise + /// \param cond condition to raise exceptions for + inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) + { + #if HALF_ERRHANDLING + if(!cond) + return; + #if HALF_ERRHANDLING_FLAGS + errflags() |= flags; + #endif + #if HALF_ERRHANDLING_ERRNO + if(flags & FE_INVALID) + errno = EDOM; + else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW)) + errno = ERANGE; + #endif + #if HALF_ERRHANDLING_FENV && HALF_ENABLE_CPP11_CFENV + std::feraiseexcept(flags); + #endif + #ifdef HALF_ERRHANDLING_THROW_INVALID + if(flags & FE_INVALID) + throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID); + #endif + #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO + if(flags & FE_DIVBYZERO) + throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO); + #endif + #ifdef HALF_ERRHANDLING_THROW_OVERFLOW + if(flags & FE_OVERFLOW) + throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW); + #endif + #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW + if(flags & FE_UNDERFLOW) + throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW); + #endif + #ifdef HALF_ERRHANDLING_THROW_INEXACT + if(flags & FE_INEXACT) + throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT); + #endif + #if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT + if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT)) + raise(FE_INEXACT); + #endif + #if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT + if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT)) + raise(FE_INEXACT); + #endif + #endif + } + + /// Check and signal for any NaN. + /// \param x first half-precision value to check + /// \param y second half-precision value to check + /// \retval true if either \a x or \a y is NaN + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00); + #endif + return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00; + } + + /// Signal and silence signaling NaN. + /// \param nan half-precision NaN value + /// \return quiet NaN + /// \exception FE_INVALID if \a nan is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, !(nan&0x200)); + #endif + return nan | 0x200; + } + + /// Signal and silence signaling NaNs. + /// \param x first half-precision value to check + /// \param y second half-precision value to check + /// \return quiet NaN + /// \exception FE_INVALID if \a x or \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200))); + #endif + return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200); + } + + /// Signal and silence signaling NaNs. + /// \param x first half-precision value to check + /// \param y second half-precision value to check + /// \param z third half-precision value to check + /// \return quiet NaN + /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200))); + #endif + return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200); + } + + /// Select value or signaling NaN. + /// \param x preferred half-precision value + /// \param y ignored half-precision value except for signaling NaN + /// \return \a y if signaling NaN, \a x otherwise + /// \exception FE_INVALID if \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) + { + #if HALF_ERRHANDLING + return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x; + #else + return x; + #endif + } + + /// Raise domain error and return NaN. + /// return quiet NaN + /// \exception FE_INVALID + inline HALF_CONSTEXPR_NOERR unsigned int invalid() + { + #if HALF_ERRHANDLING + raise(FE_INVALID); + #endif + return 0x7FFF; + } + + /// Raise pole error and return infinity. + /// \param sign half-precision value with sign bit only + /// \return half-precision infinity with sign of \a sign + /// \exception FE_DIVBYZERO + inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_DIVBYZERO); + #endif + return sign | 0x7C00; + } + + /// Check value for underflow. + /// \param arg non-zero half-precision value to check + /// \return \a arg + /// \exception FE_UNDERFLOW if arg is subnormal + inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg) + { + #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT + raise(FE_UNDERFLOW, !(arg&0x7C00)); + #endif + return arg; + } + + /// \} + /// \name Conversion and rounding + /// \{ + + /// Half-precision overflow. + /// \tparam R rounding mode to use + /// \param sign half-precision value with sign bit only + /// \return rounded overflowing half-precision value + /// \exception FE_OVERFLOW + template HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_OVERFLOW); + #endif + return (R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) : + (R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) : + (R==std::round_toward_zero) ? (sign|0x7BFF) : + (sign|0x7C00); + } + + /// Half-precision underflow. + /// \tparam R rounding mode to use + /// \param sign half-precision value with sign bit only + /// \return rounded underflowing half-precision value + /// \exception FE_UNDERFLOW + template HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_UNDERFLOW); + #endif + return (R==std::round_toward_infinity) ? (sign+1-(sign>>15)) : + (R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) : + sign; + } + + /// Round half-precision number. + /// \tparam R rounding mode to use + /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results + /// \param value finite half-precision number to round + /// \param g guard bit (most significant discarded bit) + /// \param s sticky bit (or of all but the most significant discarded bits) + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded or \a I is `true` + template HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s) + { + #if HALF_ERRHANDLING + value += (R==std::round_to_nearest) ? (g&(s|value)) : + (R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) : + (R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0; + if((value&0x7C00) == 0x7C00) + raise(FE_OVERFLOW); + else if(value & 0x7C00) + raise(FE_INEXACT, I || (g|s)!=0); + else + raise(FE_UNDERFLOW, !(HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT) || I || (g|s)!=0); + return value; + #else + return (R==std::round_to_nearest) ? (value+(g&(s|value))) : + (R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) : + (R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) : + value; + #endif + } + + /// Round half-precision number to nearest integer value. + /// \tparam R rounding mode to use + /// \tparam E `true` for round to even, `false` for round away from zero + /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it + /// \param value half-precision value to round + /// \return half-precision bits for nearest integral value + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded and \a I is `true` + template unsigned int integral(unsigned int value) + { + unsigned int abs = value & 0x7FFF; + if(abs < 0x3C00) + { + raise(FE_INEXACT, I); + return ((R==std::round_to_nearest) ? (0x3C00&-static_cast(abs>=(0x3800+E))) : + (R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) : + (R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast(value>0x8000)) : + 0) | (value&0x8000); + } + if(abs >= 0x6400) + return (abs>0x7C00) ? signal(value) : value; + unsigned int exp = 25 - (abs>>10), mask = (1<>exp)&E)) : + (R==std::round_toward_infinity) ? (mask&((value>>15)-1)) : + (R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) : + 0) + value) & ~mask; + } + + /// Convert fixed point to half-precision floating-point. + /// \tparam R rounding mode to use + /// \tparam F number of fractional bits in [11,31] + /// \tparam S `true` for signed, `false` for unsigned + /// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F + /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results + /// \param m mantissa in Q1.F fixed point format + /// \param exp biased exponent - 1 + /// \param sign half-precision value with sign bit only + /// \param s sticky bit (or of all but the most significant already discarded bits) + /// \return value converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded or \a I is `true` + template unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) + { + if(S) + { + uint32 msign = sign_mask(m); + m = (m^msign) - msign; + sign = msign & 0x8000; + } + if(N) + for(; m<(static_cast(1)<(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast(1)<<(F-11-exp))-1))!=0)); + return rounded(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast(1)<<(F-11))-1))!=0)); + } + + /// Convert IEEE single-precision to half-precision. + /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). + /// \tparam R rounding mode to use + /// \param value single-precision value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half_impl(float value, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), + (R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT : + (R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO : + (R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF : + (R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF : + _MM_FROUND_CUR_DIRECTION)); + #else + bits::type fbits; + std::memcpy(&fbits, &value, sizeof(float)); + #if 1 + unsigned int sign = (fbits>>16) & 0x8000; + fbits &= 0x7FFFFFFF; + if(fbits >= 0x7F800000) + return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0); + if(fbits >= 0x47800000) + return overflow(sign); + if(fbits >= 0x38800000) + return rounded(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0); + if(fbits >= 0x33000000) + { + int i = 125 - (fbits>>23); + fbits = (fbits&0x7FFFFF) | 0x800000; + return rounded(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast(1)<(sign); + return sign; + #else + static const uint16 base_table[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, + 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, + 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, + 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, + 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 }; + static const unsigned char shift_table[256] = { + 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; + int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp]; + fbits &= 0x7FFFFF; + uint32 m = (fbits|((exp!=0)<<23)) & -static_cast(exp!=0xFF); + return rounded(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast(1)<<(i-1))-1)&m)!=0); + #endif + #endif + } + + /// Convert IEEE double-precision to half-precision. + /// \tparam R rounding mode to use + /// \param value double-precision value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half_impl(double value, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + if(R == std::round_indeterminate) + return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION)); + #endif + bits::type dbits; + std::memcpy(&dbits, &value, sizeof(double)); + uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF; + unsigned int sign = (hi>>16) & 0x8000; + hi &= 0x7FFFFFFF; + if(hi >= 0x7FF00000) + return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0); + if(hi >= 0x40F00000) + return overflow(sign); + if(hi >= 0x3F100000) + return rounded(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0); + if(hi >= 0x3E600000) + { + int i = 1018 - (hi>>20); + hi = (hi&0xFFFFF) | 0x100000; + return rounded(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast(1)<(sign); + return sign; + } + + /// Convert non-IEEE floating-point to half-precision. + /// \tparam R rounding mode to use + /// \tparam T source type (builtin floating-point type) + /// \param value floating-point value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half_impl(T value, ...) + { + unsigned int hbits = static_cast(builtin_signbit(value)) << 15; + if(value == T()) + return hbits; + if(builtin_isnan(value)) + return hbits | 0x7FFF; + if(builtin_isinf(value)) + return hbits | 0x7C00; + int exp; + std::frexp(value, &exp); + if(exp > 16) + return overflow(hbits); + if(exp < -13) + value = std::ldexp(value, 25); + else + { + value = std::ldexp(value, 12-exp); + hbits |= ((exp+13)<<10); + } + T ival, frac = std::modf(value, &ival); + int m = std::abs(static_cast(ival)); + return rounded(hbits+(m>>1), m&1, frac!=T()); + } + + /// Convert floating-point to half-precision. + /// \tparam R rounding mode to use + /// \tparam T source type (builtin floating-point type) + /// \param value floating-point value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half(T value) + { + return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); + } + + /// Convert integer to half-precision floating-point. + /// \tparam R rounding mode to use + /// \tparam T type to convert (builtin integer type) + /// \param value integral value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int int2half(T value) + { + unsigned int bits = static_cast(value<0) << 15; + if(!value) + return bits; + if(bits) + value = -value; + if(value > 0xFFFF) + return overflow(bits); + unsigned int m = static_cast(value), exp = 24; + for(; m<0x400; m<<=1,--exp) ; + for(; m>0x7FF; m>>=1,++exp) ; + bits |= (exp<<10) + m; + return (exp>24) ? rounded(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits; + } + + /// Convert half-precision to IEEE single-precision. + /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). + /// \param value half-precision value to convert + /// \return single-precision value + inline float half2float_impl(unsigned int value, float, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value))); + #else + #if 0 + bits::type fbits = static_cast::type>(value&0x8000) << 16; + int abs = value & 0x7FFF; + if(abs) + { + fbits |= 0x38000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,fbits-=0x800000) ; + fbits += static_cast::type>(abs) << 13; + } + #else + static const bits::type mantissa_table[2048] = { + 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, + 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, + 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, + 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, + 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, + 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, + 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, + 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, + 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, + 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, + 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, + 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, + 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, + 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, + 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, + 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, + 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, + 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, + 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, + 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, + 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, + 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, + 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, + 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, + 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, + 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, + 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, + 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, + 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, + 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, + 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, + 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, + 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, + 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, + 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, + 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, + 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, + 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, + 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, + 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, + 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, + 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, + 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, + 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, + 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, + 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, + 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, + 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, + 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, + 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, + 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, + 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, + 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, + 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, + 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, + 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, + 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, + 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, + 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, + 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, + 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, + 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, + 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, + 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, + 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, + 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, + 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, + 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, + 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, + 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, + 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, + 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, + 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, + 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, + 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, + 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, + 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, + 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, + 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, + 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, + 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, + 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, + 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, + 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, + 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, + 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, + 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, + 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, + 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, + 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, + 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, + 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, + 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, + 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, + 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, + 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, + 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, + 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, + 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, + 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, + 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, + 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, + 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, + 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, + 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, + 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, + 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, + 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, + 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, + 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, + 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, + 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, + 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, + 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; + static const bits::type exponent_table[64] = { + 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, + 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, + 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, + 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; + static const unsigned short offset_table[64] = { + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; + bits::type fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; + #endif + float out; + std::memcpy(&out, &fbits, sizeof(float)); + return out; + #endif + } + + /// Convert half-precision to IEEE double-precision. + /// \param value half-precision value to convert + /// \return double-precision value + inline double half2float_impl(unsigned int value, double, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value)))); + #else + uint32 hi = static_cast(value&0x8000) << 16; + unsigned int abs = value & 0x7FFF; + if(abs) + { + hi |= 0x3F000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,hi-=0x100000) ; + hi += static_cast(abs) << 10; + } + bits::type dbits = static_cast::type>(hi) << 32; + double out; + std::memcpy(&out, &dbits, sizeof(double)); + return out; + #endif + } + + /// Convert half-precision to non-IEEE floating-point. + /// \tparam T type to convert to (builtin integer type) + /// \param value half-precision value to convert + /// \return floating-point value + template T half2float_impl(unsigned int value, T, ...) + { + T out; + unsigned int abs = value & 0x7FFF; + if(abs > 0x7C00) + out = (std::numeric_limits::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits::signaling_NaN() : + std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); + else if(abs == 0x7C00) + out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); + else if(abs > 0x3FF) + out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); + else + out = std::ldexp(static_cast(abs), -24); + return (value&0x8000) ? -out : out; + } + + /// Convert half-precision to floating-point. + /// \tparam T type to convert to (builtin integer type) + /// \param value half-precision value to convert + /// \return floating-point value + template T half2float(unsigned int value) + { + return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); + } + + /// Convert half-precision floating-point to integer. + /// \tparam R rounding mode to use + /// \tparam E `true` for round to even, `false` for round away from zero + /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it + /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) + /// \param value half-precision value to convert + /// \return rounded integer value + /// \exception FE_INVALID if value is not representable in type \a T + /// \exception FE_INEXACT if value had to be rounded and \a I is `true` + template T half2int(unsigned int value) + { + unsigned int abs = value & 0x7FFF; + if(abs >= 0x7C00) + { + raise(FE_INVALID); + return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); + } + if(abs < 0x3800) + { + raise(FE_INEXACT, I); + return (R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) : + (R==std::round_toward_neg_infinity) ? -T(value>0x8000) : + T(); + } + int exp = 25 - (abs>>10); + unsigned int m = (value&0x3FF) | 0x400; + int32 i = static_cast((exp<=0) ? (m<<-exp) : ((m+( + (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) : + (R==std::round_toward_infinity) ? (((1<>15)-1)) : + (R==std::round_toward_neg_infinity) ? (((1<>15)) : 0))>>exp)); + if((!std::numeric_limits::is_signed && (value&0x8000)) || (std::numeric_limits::digits<16 && + ((value&0x8000) ? (-i::min()) : (i>std::numeric_limits::max())))) + raise(FE_INVALID); + else if(I && exp > 0 && (m&((1<((value&0x8000) ? -i : i); + } + + /// \} + /// \name Mathematics + /// \{ + + /// upper part of 64-bit multiplication. + /// \tparam R rounding mode to use + /// \param x first factor + /// \param y second factor + /// \return upper 32 bit of \a x * \a y + template uint32 mulhi(uint32 x, uint32 y) + { + uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16); + return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) + + ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0); + } + + /// 64-bit multiplication. + /// \param x first factor + /// \param y second factor + /// \return upper 32 bit of \a x * \a y rounded to nearest + inline uint32 multiply64(uint32 x, uint32 y) + { + #if HALF_ENABLE_CPP11_LONG_LONG + return static_cast((static_cast(x)*static_cast(y)+0x80000000)>>32); + #else + return mulhi(x, y); + #endif + } + + /// 64-bit division. + /// \param x upper 32 bit of dividend + /// \param y divisor + /// \param s variable to store sticky bit for rounding + /// \return (\a x << 32) / \a y + inline uint32 divide64(uint32 x, uint32 y, int &s) + { + #if HALF_ENABLE_CPP11_LONG_LONG + unsigned long long xx = static_cast(x) << 32; + return s = (xx%y!=0), static_cast(xx/y); + #else + y >>= 1; + uint32 rem = x, div = 0; + for(unsigned int i=0; i<32; ++i) + { + div <<= 1; + if(rem >= y) + { + rem -= y; + div |= 1; + } + rem <<= 1; + } + return s = rem > 1, div; + #endif + } + + /// Half precision positive modulus. + /// \tparam Q `true` to compute full quotient, `false` else + /// \tparam R `true` to compute signed remainder, `false` for positive remainder + /// \param x first operand as positive finite half-precision value + /// \param y second operand as positive finite half-precision value + /// \param quo adress to store quotient at, `nullptr` if \a Q `false` + /// \return modulus of \a x / \a y + template unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) + { + unsigned int q = 0; + if(x > y) + { + int absx = x, absy = y, expx = 0, expy = 0; + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + expx += absx >> 10; + expy += absy >> 10; + int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + for(int d=expx-expy; d; --d) + { + if(!Q && mx == my) + return 0; + if(mx >= my) + { + mx -= my; + q += Q; + } + mx <<= 1; + q <<= static_cast(Q); + } + if(!Q && mx == my) + return 0; + if(mx >= my) + { + mx -= my; + ++q; + } + if(Q) + { + q &= (1<<(std::numeric_limits::digits-1)) - 1; + if(!mx) + return *quo = q, 0; + } + for(; mx<0x400; mx<<=1,--expy) ; + x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy)); + } + if(R) + { + unsigned int a, b; + if(y < 0x800) + { + a = (x<0x400) ? (x<<1) : (x+0x400); + b = y; + } + else + { + a = x; + b = y - 0x400; + } + if(a > b || (a == b && (q&1))) + { + int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF); + int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d)); + for(; m<0x800 && exp>1; m<<=1,--exp) ; + x = 0x8000 + ((exp-1)<<10) + (m>>1); + q += Q; + } + } + if(Q) + *quo = q; + return x; + } + + /// Fixed point square root. + /// \tparam F number of fractional bits + /// \param r radicand in Q1.F fixed point format + /// \param exp exponent + /// \return square root as Q1.F/2 + template uint32 sqrt(uint32 &r, int &exp) + { + int i = exp & 1; + r <<= i; + exp = (exp-i) / 2; + uint32 m = 0; + for(uint32 bit=static_cast(1)<>=2) + { + if(r < m+bit) + m >>= 1; + else + { + r -= m + bit; + m = (m>>1) + bit; + } + } + return m; + } + + /// Fixed point binary exponential. + /// This uses the BKM algorithm in E-mode. + /// \param m exponent in [0,1) as Q0.31 + /// \param n number of iterations (at most 32) + /// \return 2 ^ \a m as Q1.31 + inline uint32 exp2(uint32 m, unsigned int n = 32) + { + static const uint32 logs[] = { + 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, + 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, + 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, + 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; + if(!m) + return 0x80000000; + uint32 mx = 0x80000000, my = 0; + for(unsigned int i=1; i> i; + } + } + return mx; + } + + /// Fixed point binary logarithm. + /// This uses the BKM algorithm in L-mode. + /// \param m mantissa in [1,2) as Q1.30 + /// \param n number of iterations (at most 32) + /// \return log2(\a m) as Q0.31 + inline uint32 log2(uint32 m, unsigned int n = 32) + { + static const uint32 logs[] = { + 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, + 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, + 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, + 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; + if(m == 0x40000000) + return 0; + uint32 mx = 0x40000000, my = 0; + for(unsigned int i=1; i>i); + if(mz <= m) + { + mx = mz; + my += logs[i]; + } + } + return my; + } + + /// Fixed point sine and cosine. + /// This uses the CORDIC algorithm in rotation mode. + /// \param mz angle in [-pi/2,pi/2] as Q1.30 + /// \param n number of iterations (at most 31) + /// \return sine and cosine of \a mz as Q1.30 + inline std::pair sincos(uint32 mz, unsigned int n = 31) + { + static const uint32 angles[] = { + 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55, + 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000, + 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080, + 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 }; + uint32 mx = 0x26DD3B6A, my = 0; + for(unsigned int i=0; i0x3FF)<<10); + int exp = (abs>>10) + (abs<=0x3FF) - 15; + if(abs < 0x3A48) + return k = 0, m << (exp+20); + #if HALF_ENABLE_CPP11_LONG_LONG + unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi; + uint32 sign = -static_cast(f>>63); + k = static_cast(yi>>(62-exp)); + return (multiply64(static_cast((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign; + #else + uint32 yh = m*0xA2F98 + mulhi(m, 0x36E4E442), yl = (m*0x36E4E442) & 0xFFFFFFFF; + uint32 mask = (static_cast(1)<<(30-exp)) - 1, yi = (yh+(mask>>1)) & ~mask, sign = -static_cast(yi>yh); + k = static_cast(yi>>(30-exp)); + uint32 fh = (yh^sign) + (yi^~sign) - ~sign, fl = (yl^sign) - sign; + return (multiply64((exp>-1) ? (((fh<<(1+exp))&0xFFFFFFFF)|((fl&0xFFFFFFFF)>>(31-exp))) : fh, 0xC90FDAA2)^sign) - sign; + #endif + } + + /// Get arguments for atan2 function. + /// \param abs half-precision floating-point value + /// \return \a abs and sqrt(1 - \a abs^2) as Q0.30 + inline std::pair atan2_args(unsigned int abs) + { + int exp = -15; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my; + int rexp = 2 * exp; + r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast(1)<<-rexp)-1))!=0)) : 1); + for(rexp=0; r<0x40000000; r<<=1,--rexp) ; + uint32 mx = sqrt<30>(r, rexp); + int d = exp - rexp; + if(d < 0) + return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx); + if(d > 0) + return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx))); + return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx); + } + + /// Get exponentials for hyperbolic computation + /// \param abs half-precision floating-point value + /// \param exp variable to take unbiased exponent of larger result + /// \param n number of BKM iterations (at most 32) + /// \return exp(abs) and exp(-\a abs) as Q1.31 with same exponent + inline std::pair hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) + { + uint32 mx = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my; + int e = (abs>>10) + (abs<=0x3FF); + if(e < 14) + { + exp = 0; + mx >>= 14 - e; + } + else + { + exp = mx >> (45-e); + mx = (mx<<(e-14)) & 0x7FFFFFFF; + } + mx = exp2(mx, n); + int d = exp << 1, s; + if(mx > 0x80000000) + { + my = divide64(0x80000000, mx, s); + my |= s; + ++d; + } + else + my = mx; + return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast(1)< unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32) + { + if(esign) + { + exp = -exp - (m!=0); + if(exp < -25) + return underflow(sign); + else if(exp == -25) + return rounded(sign, 1, m!=0); + } + else if(exp > 15) + return overflow(sign); + if(!m) + return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp)); + m = exp2(m, n); + int s = 0; + if(esign) + m = divide64(0x80000000, m, s); + return fixed2half(m, exp+14, sign, s); + } + + /// Postprocessing for binary logarithm. + /// \tparam R rounding mode to use + /// \tparam L logarithm for base transformation as Q1.31 + /// \param m fractional part of logarithm as Q0.31 + /// \param ilog signed integer part of logarithm + /// \param exp biased exponent of result + /// \param sign sign bit of result + /// \return value base-transformed and converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) + { + uint32 msign = sign_mask(ilog); + m = (((static_cast(ilog)<<27)+(m>>4))^msign) - msign; + if(!m) + return 0; + for(; m<0x80000000; m<<=1,--exp) ; + int i = m >= L, s; + exp += i; + m >>= 1 + i; + sign ^= msign & 0x8000; + if(exp < -11) + return underflow(sign); + m = divide64(m, L, s); + return fixed2half(m, exp, sign, 1); + } + + /// Hypotenuse square root and postprocessing. + /// \tparam R rounding mode to use + /// \param r mantissa as Q2.30 + /// \param exp biased exponent + /// \return square root converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int hypot_post(uint32 r, int exp) + { + int i = r >> 31; + if((exp+=i) > 46) + return overflow(); + if(exp < -34) + return underflow(); + r = (r>>i) | (r&i); + uint32 m = sqrt<30>(r, exp+=15); + return fixed2half(m, exp-1, 0, r!=0); + } + + /// Division and postprocessing for tangents. + /// \tparam R rounding mode to use + /// \param my dividend as Q1.31 + /// \param mx divisor as Q1.31 + /// \param exp biased exponent of result + /// \param sign sign bit of result + /// \return quotient converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) + { + int i = my >= mx, s; + exp += i; + if(exp > 29) + return overflow(sign); + if(exp < -11) + return underflow(sign); + uint32 m = divide64(my>>(i+1), mx, s); + return fixed2half(m, exp, sign, s); + } + + /// Area function and postprocessing. + /// This computes the value directly in Q2.30 using the representation `asinh|acosh(x) = log(x+sqrt(x^2+|-1))`. + /// \tparam R rounding mode to use + /// \tparam S `true` for asinh, `false` for acosh + /// \param arg half-precision argument + /// \return asinh|acosh(\a arg) converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int area(unsigned int arg) + { + int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i; + uint32 mx = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r; + for(; abs<0x400; abs<<=1,--expy) ; + expy += abs >> 10; + r = ((abs&0x3FF)|0x400) << 5; + r *= r; + i = r >> 31; + expy = 2*expy + i; + r >>= i; + if(S) + { + if(expy < 0) + { + r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast(1)<<-expy)-1))!=0)) : 1); + expy = 0; + } + else + { + r += 0x40000000 >> expy; + i = r >> 31; + r = (r>>i) | (r&i); + expy += i; + } + } + else + { + r -= 0x40000000 >> expy; + for(; r<0x40000000; r<<=1,--expy) ; + } + my = sqrt<30>(r, expy); + my = (my<<15) + (r<<14)/my; + if(S) + { + mx >>= expy - expx; + ilog = expy; + } + else + { + my >>= expx - expy; + ilog = expx; + } + my += mx; + i = my >> 31; + static const int G = S && (R==std::round_to_nearest); + return log2_post(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast(S)<<15)); + } + + /// Class for 1.31 unsigned floating-point computation + struct f31 + { + /// Constructor. + /// \param mant mantissa as 1.31 + /// \param e exponent + HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {} + + /// Constructor. + /// \param abs unsigned half-precision value + f31(unsigned int abs) : exp(-15) + { + for(; abs<0x400; abs<<=1,--exp) ; + m = static_cast((abs&0x3FF)|0x400) << 21; + exp += (abs>>10); + } + + /// Addition operator. + /// \param a first operand + /// \param b second operand + /// \return \a a + \a b + friend f31 operator+(f31 a, f31 b) + { + if(b.exp > a.exp) + std::swap(a, b); + int d = a.exp - b.exp; + uint32 m = a.m + ((d<32) ? (b.m>>d) : 0); + int i = (m&0xFFFFFFFF) < a.m; + return f31(((m+i)>>i)|0x80000000, a.exp+i); + } + + /// Subtraction operator. + /// \param a first operand + /// \param b second operand + /// \return \a a - \a b + friend f31 operator-(f31 a, f31 b) + { + int d = a.exp - b.exp, exp = a.exp; + uint32 m = a.m - ((d<32) ? (b.m>>d) : 0); + if(!m) + return f31(0, -32); + for(; m<0x80000000; m<<=1,--exp) ; + return f31(m, exp); + } + + /// Multiplication operator. + /// \param a first operand + /// \param b second operand + /// \return \a a * \a b + friend f31 operator*(f31 a, f31 b) + { + uint32 m = multiply64(a.m, b.m); + int i = m >> 31; + return f31(m<<(1-i), a.exp + b.exp + i); + } + + /// Division operator. + /// \param a first operand + /// \param b second operand + /// \return \a a / \a b + friend f31 operator/(f31 a, f31 b) + { + int i = a.m >= b.m, s; + uint32 m = divide64((a.m+i)>>i, b.m, s); + return f31(m, a.exp - b.exp + i - 1); + } + + uint32 m; ///< mantissa as 1.31. + int exp; ///< exponent. + }; + + /// Error function and postprocessing. + /// This computes the value directly in Q1.31 using the approximations given + /// [here](https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions). + /// \tparam R rounding mode to use + /// \tparam C `true` for comlementary error function, `false` else + /// \param arg half-precision function argument + /// \return approximated value of error function in half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int erf(unsigned int arg) + { + unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; + f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t; + f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t / + ((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<>(31-x2.exp))); + return (!C || sign) ? fixed2half(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) : + (e.exp<-25) ? underflow() : fixed2half(e.m>>1, e.exp+14, 0, e.m&1); + } + + /// Gamma function and postprocessing. + /// This approximates the value of either the gamma function or its logarithm directly in Q1.31. + /// \tparam R rounding mode to use + /// \tparam L `true` for lograithm of gamma function, `false` for gamma function + /// \param arg half-precision floating-point value + /// \return lgamma/tgamma(\a arg) in half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if \a arg is not a positive integer + template unsigned int gamma(unsigned int arg) + { +/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 }; + double t = arg + 4.65, s = p[0]; + for(unsigned int i=0; i<5; ++i) + s += p[i+1] / (arg+i); + return std::log(s) + (arg-0.5)*std::log(t) - t; +*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0); + unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; + bool bsign = sign != 0; + f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s = + f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1)) + + f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1)); + int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16); + s = f31((static_cast(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe; + if(x.exp != -1 || x.m != 0x80000000) + { + i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8); + f31 l = f31((static_cast(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe; + s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l); + } + s = x.exp ? (s-t) : (t-s); + if(bsign) + { + if(z.exp >= 0) + { + sign &= (L|((z.m>>(31-z.exp))&1)) - 1; + for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ; + } + if(z.exp == -1) + z = f31(0x80000000, 0) - z; + if(z.exp < -1) + { + z = z * pi; + z.m = sincos(z.m>>(1-z.exp), 30).first; + for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ; + } + else + z = f31(0x80000000, 0); + } + if(L) + { + if(bsign) + { + f31 l(0x92868247, 0); + if(z.exp < 0) + { + uint32 m = log2((z.m+1)>>1, 27); + z = f31(-((static_cast(z.exp)<<26)+(m>>5)), 5); + for(; z.m<0x80000000; z.m<<=1,--z.exp) ; + l = l + z / lbe; + } + sign = static_cast(x.exp&&(l.exp(x.exp==0) << 15; + if(s.exp < -24) + return underflow(sign); + if(s.exp > 15) + return overflow(sign); + } + } + else + { + s = s * lbe; + uint32 m; + if(s.exp < 0) + { + m = s.m >> -s.exp; + s.exp = 0; + } + else + { + m = (s.m<>(31-s.exp)); + } + s.m = exp2(m, 27); + if(!x.exp) + s = f31(0x80000000, 0) / s; + if(bsign) + { + if(z.exp < 0) + s = s * z; + s = pi / s; + if(s.exp < -24) + return underflow(sign); + } + else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1))) + return ((s.exp+14)<<10) + (s.m>>21); + if(s.exp > 15) + return overflow(sign); + } + return fixed2half(s.m, s.exp+14, sign); + } + /// \} + + template struct half_caster; + } + + /// Half-precision floating-point type. + /// This class implements an IEEE-conformant half-precision floating-point type with the usual arithmetic + /// operators and conversions. It is implicitly convertible to single-precision floating-point, which makes artihmetic + /// expressions and functions with mixed-type operands to be of the most precise operand type. + /// + /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and + /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which + /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the + /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of + /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most + /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit + /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if + /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on + /// nearly any reasonable platform. + /// + /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable + /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. + class half + { + public: + /// \name Construction and assignment + /// \{ + + /// Default constructor. + /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics + /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} + + /// Conversion constructor. + /// \param rhs float to convert + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + explicit half(float rhs) : data_(static_cast(detail::float2half(rhs))) {} + + /// Conversion to single-precision. + /// \return single precision value representing expression value + operator float() const { return detail::half2float(data_); } + + /// Assignment operator. + /// \param rhs single-precision value to copy from + /// \return reference to this half + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + half& operator=(float rhs) { data_ = static_cast(detail::float2half(rhs)); return *this; } + + /// \} + /// \name Arithmetic updates + /// \{ + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to add + /// \return reference to this half + /// \exception FE_... according to operator+(half,half) + half& operator+=(half rhs) { return *this = *this + rhs; } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to subtract + /// \return reference to this half + /// \exception FE_... according to operator-(half,half) + half& operator-=(half rhs) { return *this = *this - rhs; } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to multiply with + /// \return reference to this half + /// \exception FE_... according to operator*(half,half) + half& operator*=(half rhs) { return *this = *this * rhs; } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to divide by + /// \return reference to this half + /// \exception FE_... according to operator/(half,half) + half& operator/=(half rhs) { return *this = *this / rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to add + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator+=(float rhs) { return *this = *this + rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to subtract + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator-=(float rhs) { return *this = *this - rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to multiply with + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator*=(float rhs) { return *this = *this * rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to divide by + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator/=(float rhs) { return *this = *this / rhs; } + + /// \} + /// \name Increment and decrement + /// \{ + + /// Prefix increment. + /// \return incremented half value + /// \exception FE_... according to operator+(half,half) + half& operator++() { return *this = *this + half(detail::binary, 0x3C00); } + + /// Prefix decrement. + /// \return decremented half value + /// \exception FE_... according to operator-(half,half) + half& operator--() { return *this = *this + half(detail::binary, 0xBC00); } + + /// Postfix increment. + /// \return non-incremented half value + /// \exception FE_... according to operator+(half,half) + half operator++(int) { half out(*this); ++*this; return out; } + + /// Postfix decrement. + /// \return non-decremented half value + /// \exception FE_... according to operator-(half,half) + half operator--(int) { half out(*this); --*this; return out; } + /// \} + + private: + /// Rounding mode to use + static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); + + /// Constructor. + /// \param bits binary representation to set half to + HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast(bits)) {} + + /// Internal binary representation + detail::uint16 data_; + + #ifndef HALF_DOXYGEN_ONLY + friend HALF_CONSTEXPR_NOERR bool operator==(half, half); + friend HALF_CONSTEXPR_NOERR bool operator!=(half, half); + friend HALF_CONSTEXPR_NOERR bool operator<(half, half); + friend HALF_CONSTEXPR_NOERR bool operator>(half, half); + friend HALF_CONSTEXPR_NOERR bool operator<=(half, half); + friend HALF_CONSTEXPR_NOERR bool operator>=(half, half); + friend HALF_CONSTEXPR half operator-(half); + friend half operator+(half, half); + friend half operator-(half, half); + friend half operator*(half, half); + friend half operator/(half, half); + template friend std::basic_ostream& operator<<(std::basic_ostream&, half); + template friend std::basic_istream& operator>>(std::basic_istream&, half&); + friend HALF_CONSTEXPR half fabs(half); + friend half fmod(half, half); + friend half remainder(half, half); + friend half remquo(half, half, int*); + friend half fma(half, half, half); + friend HALF_CONSTEXPR_NOERR half fmax(half, half); + friend HALF_CONSTEXPR_NOERR half fmin(half, half); + friend half fdim(half, half); + friend half nanh(const char*); + friend half exp(half); + friend half exp2(half); + friend half expm1(half); + friend half log(half); + friend half log10(half); + friend half log2(half); + friend half log1p(half); + friend half sqrt(half); + friend half rsqrt(half); + friend half cbrt(half); + friend half hypot(half, half); + friend half hypot(half, half, half); + friend half pow(half, half); + friend void sincos(half, half*, half*); + friend half sin(half); + friend half cos(half); + friend half tan(half); + friend half asin(half); + friend half acos(half); + friend half atan(half); + friend half atan2(half, half); + friend half sinh(half); + friend half cosh(half); + friend half tanh(half); + friend half asinh(half); + friend half acosh(half); + friend half atanh(half); + friend half erf(half); + friend half erfc(half); + friend half lgamma(half); + friend half tgamma(half); + friend half ceil(half); + friend half floor(half); + friend half trunc(half); + friend half round(half); + friend long lround(half); + friend half rint(half); + friend long lrint(half); + friend half nearbyint(half); + #ifdef HALF_ENABLE_CPP11_LONG_LONG + friend long long llround(half); + friend long long llrint(half); + #endif + friend half frexp(half, int*); + friend half scalbln(half, long); + friend half modf(half, half*); + friend int ilogb(half); + friend half logb(half); + friend half nextafter(half, half); + friend half nexttoward(half, long double); + friend HALF_CONSTEXPR half copysign(half, half); + friend HALF_CONSTEXPR int fpclassify(half); + friend HALF_CONSTEXPR bool isfinite(half); + friend HALF_CONSTEXPR bool isinf(half); + friend HALF_CONSTEXPR bool isnan(half); + friend HALF_CONSTEXPR bool isnormal(half); + friend HALF_CONSTEXPR bool signbit(half); + friend HALF_CONSTEXPR bool isgreater(half, half); + friend HALF_CONSTEXPR bool isgreaterequal(half, half); + friend HALF_CONSTEXPR bool isless(half, half); + friend HALF_CONSTEXPR bool islessequal(half, half); + friend HALF_CONSTEXPR bool islessgreater(half, half); + template friend struct detail::half_caster; + friend class std::numeric_limits; + #if HALF_ENABLE_CPP11_HASH + friend struct std::hash; + #endif + #if HALF_ENABLE_CPP11_USER_LITERALS + friend half literal::operator "" _h(long double); + #endif + #endif + }; + +#if HALF_ENABLE_CPP11_USER_LITERALS + namespace literal + { + /// Half literal. + /// While this returns a properly rounded half-precision value, half literals can unfortunately not be constant + /// expressions due to rather involved conversions. So don't expect this to be a literal literal without involving + /// conversion operations at runtime. It is a convenience feature, not a performance optimization. + /// \param value literal value + /// \return half with of given value (possibly rounded) + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } + } +#endif + + namespace detail + { + /// Helper class for half casts. + /// This class template has to be specialized for all valid cast arguments to define an appropriate static + /// `cast` member function and a corresponding `type` member denoting its return type. + /// \tparam T destination type + /// \tparam U source type + /// \tparam R rounding mode to use + template struct half_caster {}; + template struct half_caster + { + #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); + #endif + + static half cast(U arg) { return cast_impl(arg, is_float()); }; + + private: + static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } + static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } + }; + template struct half_caster + { + #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); + #endif + + static T cast(half arg) { return cast_impl(arg, is_float()); } + + private: + static T cast_impl(half arg, true_type) { return half2float(arg.data_); } + static T cast_impl(half arg, false_type) { return half2int(arg.data_); } + }; + template struct half_caster + { + static half cast(half arg) { return arg; } + }; + } +} + +/// Extensions to the C++ standard library. +namespace std +{ + /// Numeric limits for half-precision floats. + /// **See also:** Documentation for [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits) + template<> class numeric_limits + { + public: + /// Is template specialization. + static HALF_CONSTEXPR_CONST bool is_specialized = true; + + /// Supports signed values. + static HALF_CONSTEXPR_CONST bool is_signed = true; + + /// Is not an integer type. + static HALF_CONSTEXPR_CONST bool is_integer = false; + + /// Is not exact. + static HALF_CONSTEXPR_CONST bool is_exact = false; + + /// Doesn't provide modulo arithmetic. + static HALF_CONSTEXPR_CONST bool is_modulo = false; + + /// Has a finite set of values. + static HALF_CONSTEXPR_CONST bool is_bounded = true; + + /// IEEE conformant. + static HALF_CONSTEXPR_CONST bool is_iec559 = true; + + /// Supports infinity. + static HALF_CONSTEXPR_CONST bool has_infinity = true; + + /// Supports quiet NaNs. + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; + + /// Supports signaling NaNs. + static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true; + + /// Supports subnormal values. + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; + + /// Supports no denormalization detection. + static HALF_CONSTEXPR_CONST bool has_denorm_loss = false; + + #if HALF_ERRHANDLING_THROWS + static HALF_CONSTEXPR_CONST bool traps = true; + #else + /// Traps only if [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID) is acitvated. + static HALF_CONSTEXPR_CONST bool traps = false; + #endif + + /// Does not support no pre-rounding underflow detection. + static HALF_CONSTEXPR_CONST bool tinyness_before = false; + + /// Rounding mode. + static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style; + + /// Significant digits. + static HALF_CONSTEXPR_CONST int digits = 11; + + /// Significant decimal digits. + static HALF_CONSTEXPR_CONST int digits10 = 3; + + /// Required decimal digits to represent all possible values. + static HALF_CONSTEXPR_CONST int max_digits10 = 5; + + /// Number base. + static HALF_CONSTEXPR_CONST int radix = 2; + + /// One more than smallest exponent. + static HALF_CONSTEXPR_CONST int min_exponent = -13; + + /// Smallest normalized representable power of 10. + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; + + /// One more than largest exponent + static HALF_CONSTEXPR_CONST int max_exponent = 16; + + /// Largest finitely representable power of 10. + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; + + /// Smallest positive normal value. + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } + + /// Smallest finite value. + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } + + /// Largest finite value. + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } + + /// Difference between 1 and next representable value. + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } + + /// Maximum rounding error in ULP (units in the last place). + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW + { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } + + /// Positive infinity. + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } + + /// Quiet NaN. + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } + + /// Signaling NaN. + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } + + /// Smallest positive subnormal value. + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } + }; + +#if HALF_ENABLE_CPP11_HASH + /// Hash function for half-precision floats. + /// This is only defined if C++11 `std::hash` is supported and enabled. + /// + /// **See also:** Documentation for [std::hash](https://en.cppreference.com/w/cpp/utility/hash) + template<> struct hash + { + /// Type of function argument. + typedef half_float::half argument_type; + + /// Function return type. + typedef size_t result_type; + + /// Compute hash function. + /// \param arg half to hash + /// \return hash value + result_type operator()(argument_type arg) const { return hash()(arg.data_&-static_cast(arg.data_!=0x8000)); } + }; +#endif +} + +namespace half_float +{ + /// \anchor compop + /// \name Comparison operators + /// \{ + + /// Comparison for equality. + /// \param x first operand + /// \param y second operand + /// \retval true if operands equal + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)); + } + + /// Comparison for inequality. + /// \param x first operand + /// \param y second operand + /// \retval true if operands not equal + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y) + { + return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)); + } + + /// Comparison for less than. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less than \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// Comparison for greater than. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater than \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// Comparison for less equal. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less equal \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// Comparison for greater equal. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater equal \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// \} + /// \anchor arithmetics + /// \name Arithmetic operators + /// \{ + + /// Identity. + /// \param arg operand + /// \return unchanged operand + inline HALF_CONSTEXPR half operator+(half arg) { return arg; } + + /// Negation. + /// \param arg operand + /// \return negated operand + inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); } + + /// Addition. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return sum of half expressions + /// \exception FE_INVALID if \a x and \a y are infinities with different signs or signaling NaNs + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator+(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)+detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF; + bool sub = ((x.data_^y.data_)&0x8000) != 0; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ : + (sub && absx==0x7C00) ? detail::invalid() : y.data_); + if(!absx) + return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_)); + if(!absy) + return x; + unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000; + if(absy > absx) + std::swap(absx, absy); + int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my; + if(d < 13) + { + my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3; + my = (my>>d) | ((my&((1<(half::round_style==std::round_toward_neg_infinity)<<15); + for(; mx<0x2000 && exp>1; mx<<=1,--exp) ; + } + else + { + mx += my; + int i = mx >> 14; + if((exp+=i) > 30) + return half(detail::binary, detail::overflow(sign)); + mx = (mx>>i) | (mx&i); + } + return half(detail::binary, detail::rounded(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0)); + #endif + } + + /// Subtraction. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return difference of half expressions + /// \exception FE_INVALID if \a x and \a y are infinities with equal signs or signaling NaNs + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator-(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)-detail::half2float(y.data_))); + #else + return x + -y; + #endif + } + + /// Multiplication. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return product of half expressions + /// \exception FE_INVALID if multiplying 0 with infinity or if \a x or \a y is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator*(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)*detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16; + unsigned int sign = (x.data_^y.data_) & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + ((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00)); + if(!absx || !absy) + return half(detail::binary, sign); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); + int i = m >> 21, s = m & i; + exp += (absx>>10) + (absy>>10) + i; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -11) + return half(detail::binary, detail::underflow(sign)); + return half(detail::binary, detail::fixed2half(m>>i, exp, sign, s)); + #endif + } + + /// Division. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return quotient of half expressions + /// \exception FE_INVALID if dividing 0s or infinities with each other or if \a x or \a y is signaling NaN + /// \exception FE_DIVBYZERO if dividing finite value by 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator/(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)/detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14; + unsigned int sign = (x.data_^y.data_) & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0))); + if(!absx) + return half(detail::binary, absy ? sign : detail::invalid()); + if(!absy) + return half(detail::binary, detail::pole(sign)); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,++exp) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + int i = mx < my; + exp += (absx>>10) - (absy>>10) - i; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -11) + return half(detail::binary, detail::underflow(sign)); + mx <<= 12 + i; + my <<= 1; + return half(detail::binary, detail::fixed2half(mx/my, exp, sign, mx%my!=0)); + #endif + } + + /// \} + /// \anchor streaming + /// \name Input and output + /// \{ + + /// Output operator. + /// This uses the built-in functionality for streaming out floating-point numbers. + /// \param out output stream to write into + /// \param arg half expression to write + /// \return reference to output stream + template std::basic_ostream& operator<<(std::basic_ostream &out, half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return out << detail::half2float(arg.data_); + #else + return out << detail::half2float(arg.data_); + #endif + } + + /// Input operator. + /// This uses the built-in functionality for streaming in floating-point numbers, specifically double precision floating + /// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref HALF_ARITHMETIC_TYPE)). So the input string is first + /// rounded to double precision using the underlying platform's current floating-point rounding mode before being rounded + /// to half-precision using the library's half-precision rounding mode. + /// \param in input stream to read from + /// \param arg half to read into + /// \return reference to input stream + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + template std::basic_istream& operator>>(std::basic_istream &in, half &arg) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t f; + #else + double f; + #endif + if(in >> f) + arg.data_ = detail::float2half(f); + return in; + } + + /// \} + /// \anchor basic + /// \name Basic mathematical operations + /// \{ + + /// Absolute value. + /// **See also:** Documentation for [std::fabs](https://en.cppreference.com/w/cpp/numeric/math/fabs). + /// \param arg operand + /// \return absolute value of \a arg + inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); } + + /// Absolute value. + /// **See also:** Documentation for [std::abs](https://en.cppreference.com/w/cpp/numeric/math/fabs). + /// \param arg operand + /// \return absolute value of \a arg + inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); } + + /// Remainder of division. + /// **See also:** Documentation for [std::fmod](https://en.cppreference.com/w/cpp/numeric/math/fmod). + /// \param x first operand + /// \param y second operand + /// \return remainder of floating-point division. + /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + inline half fmod(half x, half y) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : x.data_); + if(!absy) + return half(detail::binary, detail::invalid()); + if(!absx) + return x; + if(absx == absy) + return half(detail::binary, sign); + return half(detail::binary, sign|detail::mod(absx, absy)); + } + + /// Remainder of division. + /// **See also:** Documentation for [std::remainder](https://en.cppreference.com/w/cpp/numeric/math/remainder). + /// \param x first operand + /// \param y second operand + /// \return remainder of floating-point division. + /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + inline half remainder(half x, half y) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : x.data_); + if(!absy) + return half(detail::binary, detail::invalid()); + if(absx == absy) + return half(detail::binary, sign); + return half(detail::binary, sign^detail::mod(absx, absy)); + } + + /// Remainder of division. + /// **See also:** Documentation for [std::remquo](https://en.cppreference.com/w/cpp/numeric/math/remquo). + /// \param x first operand + /// \param y second operand + /// \param quo address to store some bits of quotient at + /// \return remainder of floating-point division. + /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + inline half remquo(half x, half y, int *quo) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_)); + if(!absy) + return half(detail::binary, detail::invalid()); + bool qsign = ((value^y.data_)&0x8000) != 0; + int q = 1; + if(absx != absy) + value ^= detail::mod(absx, absy, &q); + return *quo = qsign ? -q : q, half(detail::binary, value); + } + + /// Fused multiply add. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::fma](https://en.cppreference.com/w/cpp/numeric/math/fma). + /// \param x first operand + /// \param y second operand + /// \param z third operand + /// \return ( \a x * \a y ) + \a z rounded as one operation. + /// \exception FE_INVALID according to operator*() and operator+() unless any argument is a quiet NaN and no argument is a signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding the final addition + inline half fma(half x, half y, half z) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); + #if HALF_ENABLE_CPP11_CMATH && FP_FAST_FMA + return half(detail::binary, detail::float2half(std::fma(fx, fy, fz))); + #else + return half(detail::binary, detail::float2half(fx*fy+fz)); + #endif + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15; + unsigned int sign = (x.data_^y.data_) & 0x8000; + bool sub = ((sign^z.data_)&0x8000) != 0; + if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) + return (absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) : + (absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : + (absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z; + if(!absx || !absy) + return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign)); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); + int i = m >> 21; + exp += (absx>>10) + (absy>>10) + i; + m <<= 3 - i; + if(absz) + { + int expz = 0; + for(; absz<0x400; absz<<=1,--expz) ; + expz += absz >> 10; + detail::uint32 mz = static_cast((absz&0x3FF)|0x400) << 13; + if(expz > exp || (expz == exp && mz > m)) + { + std::swap(m, mz); + std::swap(exp, expz); + if(sub) + sign = z.data_ & 0x8000; + } + int d = exp - expz; + mz = (d<23) ? ((mz>>d)|((mz&((static_cast(1)<(half::round_style==std::round_toward_neg_infinity)<<15); + for(; m<0x800000; m<<=1,--exp) ; + } + else + { + m += mz; + i = m >> 24; + m = (m>>i) | (m&i); + exp += i; + } + } + if(exp > 30) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -10) + return half(detail::binary, detail::underflow(sign)); + return half(detail::binary, detail::fixed2half(m, exp-1, sign)); + #endif + } + + /// Maximum of half expressions. + /// **See also:** Documentation for [std::fmax](https://en.cppreference.com/w/cpp/numeric/math/fmax). + /// \param x first operand + /// \param y second operand + /// \return maximum of operands, ignoring quiet NaNs + /// \exception FE_INVALID if \a x or \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) + { + return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < + (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); + } + + /// Minimum of half expressions. + /// **See also:** Documentation for [std::fmin](https://en.cppreference.com/w/cpp/numeric/math/fmin). + /// \param x first operand + /// \param y second operand + /// \return minimum of operands, ignoring quiet NaNs + /// \exception FE_INVALID if \a x or \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR half fmin(half x, half y) + { + return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) > + (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); + } + + /// Positive difference. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::fdim](https://en.cppreference.com/w/cpp/numeric/math/fdim). + /// \param x first operand + /// \param y second operand + /// \return \a x - \a y or 0 if difference negative + /// \exception FE_... according to operator-(half,half) + inline half fdim(half x, half y) + { + if(isnan(x) || isnan(y)) + return half(detail::binary, detail::signal(x.data_, y.data_)); + return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y); + } + + /// Get NaN value. + /// **See also:** Documentation for [std::nan](https://en.cppreference.com/w/cpp/numeric/math/nan). + /// \param arg string code + /// \return quiet NaN + inline half nanh(const char *arg) + { + unsigned int value = 0x7FFF; + while(*arg) + value ^= static_cast(*arg++) & 0xFF; + return half(detail::binary, value); + } + + /// \} + /// \anchor exponential + /// \name Exponential functions + /// \{ + + /// Exponential function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::exp](https://en.cppreference.com/w/cpp/numeric/math/exp). + /// \param arg function argument + /// \return e raised to \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half exp(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::exp(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); + if(abs >= 0x4C80) + return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); + detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); + if(e < 14) + { + exp = 0; + m >>= 14 - e; + } + else + { + exp = m >> (45-e); + m = (m<<(e-14)) & 0x7FFFFFFF; + } + return half(detail::binary, detail::exp2_post(m, exp, (arg.data_&0x8000)!=0, 0, 26)); + #endif + } + + /// Binary exponential. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::exp2](https://en.cppreference.com/w/cpp/numeric/math/exp2). + /// \param arg function argument + /// \return 2 raised to \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half exp2(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::exp2(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10); + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); + if(abs >= 0x4E40) + return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); + return half(detail::binary, detail::exp2_post( + (static_cast(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28)); + #endif + } + + /// Exponential minus one. + /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` + /// and in <1% of inputs for any other rounding mode. + /// + /// **See also:** Documentation for [std::expm1](https://en.cppreference.com/w/cpp/numeric/math/expm1). + /// \param arg function argument + /// \return e raised to \a arg and subtracted by 1 + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half expm1(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::expm1(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_)); + if(abs >= 0x4A00) + return half(detail::binary, (arg.data_&0x8000) ? detail::rounded(0xBBFF, 1, 1) : detail::overflow()); + detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); + if(e < 14) + { + exp = 0; + m >>= 14 - e; + } + else + { + exp = m >> (45-e); + m = (m<<(e-14)) & 0x7FFFFFFF; + } + m = detail::exp2(m); + if(sign) + { + int s = 0; + if(m > 0x80000000) + { + ++exp; + m = detail::divide64(0x80000000, m, s); + } + m = 0x80000000 - ((m>>exp)|((m&((static_cast(1)<>exp) : 1; + for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ; + if(exp > 29) + return half(detail::binary, detail::overflow()); + return half(detail::binary, detail::rounded(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0)); + #endif + } + + /// Natural logarithm. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::log](https://en.cppreference.com/w/cpp/numeric/math/log). + /// \param arg function argument + /// \return logarithm of \a arg to base e + /// \exception FE_INVALID for signaling NaN or negative argument + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::log(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + return half(detail::binary, detail::log2_post( + detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17)); + #endif + } + + /// Common logarithm. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::log10](https://en.cppreference.com/w/cpp/numeric/math/log10). + /// \param arg function argument + /// \return logarithm of \a arg to base 10 + /// \exception FE_INVALID for signaling NaN or negative argument + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log10(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::log10(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + switch(abs) + { + case 0x4900: return half(detail::binary, 0x3C00); + case 0x5640: return half(detail::binary, 0x4000); + case 0x63D0: return half(detail::binary, 0x4200); + case 0x70E2: return half(detail::binary, 0x4400); + } + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + return half(detail::binary, detail::log2_post( + detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16)); + #endif + } + + /// Binary logarithm. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::log2](https://en.cppreference.com/w/cpp/numeric/math/log2). + /// \param arg function argument + /// \return logarithm of \a arg to base 2 + /// \exception FE_INVALID for signaling NaN or negative argument + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log2(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::log2(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15, s = 0; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + if(abs == 0x3C00) + return half(detail::binary, 0); + for(; abs<0x400; abs<<=1,--exp) ; + exp += (abs>>10); + if(!(abs&0x3FF)) + { + unsigned int value = static_cast(exp<0) << 15, m = std::abs(exp) << 6; + for(exp=18; m<0x400; m<<=1,--exp) ; + return half(detail::binary, value+(exp<<10)+m); + } + detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m = + (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign; + if(!m) + return half(detail::binary, 0); + for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ; + for(; m>0xFFFFFFF; m>>=1,++exp) + s |= m & 1; + return half(detail::binary, detail::fixed2half(m, exp, sign&0x8000, s)); + #endif + } + + /// Natural logarithm plus one. + /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` + /// and in ~1% of inputs for any other rounding mode. + /// + /// **See also:** Documentation for [std::log1p](https://en.cppreference.com/w/cpp/numeric/math/log1p). + /// \param arg function argument + /// \return logarithm of \a arg plus 1 to base e + /// \exception FE_INVALID for signaling NaN or argument <-1 + /// \exception FE_DIVBYZERO for -1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log1p(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::log1p(detail::half2float(arg.data_)))); + #else + if(arg.data_ >= 0xBC00) + return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + detail::uint32 m = static_cast((abs&0x3FF)|0x400) << 20; + if(arg.data_ & 0x8000) + { + m = 0x40000000 - (m>>-exp); + for(exp=0; m<0x40000000; m<<=1,--exp) ; + } + else + { + if(exp < 0) + { + m = 0x40000000 + (m>>-exp); + exp = 0; + } + else + { + m += 0x40000000 >> exp; + int i = m >> 31; + m >>= i; + exp += i; + } + } + return half(detail::binary, detail::log2_post(detail::log2(m), exp, 17)); + #endif + } + + /// \} + /// \anchor power + /// \name Power functions + /// \{ + + /// Square root. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::sqrt](https://en.cppreference.com/w/cpp/numeric/math/sqrt). + /// \param arg function argument + /// \return square root of \a arg + /// \exception FE_INVALID for signaling NaN and negative arguments + /// \exception FE_INEXACT according to rounding + inline half sqrt(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sqrt(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 15; + if(!abs || arg.data_ >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_); + for(; abs<0x400; abs<<=1,--exp) ; + detail::uint32 r = static_cast((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10); + return half(detail::binary, detail::rounded((exp<<10)+(m&0x3FF), r>m, r!=0)); + #endif + } + + /// Inverse square root. + /// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing + /// 1 / sqrt(\a arg) in half-precision, in addition to also being faster. + /// \param arg function argument + /// \return reciprocal of square root of \a arg + /// \exception FE_INVALID for signaling NaN and negative arguments + /// \exception FE_INEXACT according to rounding + inline half rsqrt(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::internal_t(1)/std::sqrt(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000; + if(!abs || arg.data_ >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? + detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0); + for(; abs<0x400; abs<<=1,bias-=0x400) ; + unsigned int frac = (abs+=bias) & 0x7FF; + if(frac == 0x400) + return half(detail::binary, 0x7A00-(abs>>1)); + if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) || + (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B))) + return pow(arg, half(detail::binary, 0xB800)); + detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my; + int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=mz>>10) >> 31; + expy += i; + my = (my>>(20+i)) + 1; + i = (mz=my*my) >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=(mz>>10)+1) >> 31; + return half(detail::binary, detail::fixed2half(my>>i, expy+i+14)); + #endif + } + + /// Cubic root. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::cbrt](https://en.cppreference.com/w/cpp/numeric/math/cbrt). + /// \param arg function argument + /// \return cubic root of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT according to rounding + inline half cbrt(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::cbrt(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs || abs == 0x3C00 || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1, --exp); + detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m = + (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign; + for(exp=2; m<0x80000000; m<<=1,--exp) ; + m = detail::multiply64(m, 0xAAAAAAAB); + int i = m >> 31, s; + exp += i; + m <<= 1 - i; + if(exp < 0) + { + f = m >> -exp; + exp = 0; + } + else + { + f = (m<> (31-exp); + } + m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26); + if(sign) + { + if(m > 0x80000000) + { + m = detail::divide64(0x80000000, m, s); + ++exp; + } + exp = -exp; + } + return half(detail::binary, (half::round_style==std::round_to_nearest) ? + detail::fixed2half(m, exp+14, arg.data_&0x8000) : + detail::fixed2half((m+0x80)>>8, exp+14, arg.data_&0x8000)); + #endif + } + + /// Hypotenuse function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). + /// \param x first argument + /// \param y second argument + /// \return square root of sum of squares without internal over- or underflows + /// \exception FE_INVALID if \a x or \a y is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root + inline half hypot(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_); + #if HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::hypot(fx, fy))); + #else + return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy))); + #endif + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, y.data_) : + (absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_)); + if(!absx) + return half(detail::binary, absy ? detail::check_underflow(absy) : 0); + if(!absy) + return half(detail::binary, detail::check_underflow(absx)); + if(absy > absx) + std::swap(absx, absy); + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + mx *= mx; + my *= my; + int ix = mx >> 21, iy = my >> 21; + expx = 2*(expx+(absx>>10)) - 15 + ix; + expy = 2*(expy+(absy>>10)) - 15 + iy; + mx <<= 10 - ix; + my <<= 10 - iy; + int d = expx - expy; + my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); + #endif + } + + /// Hypotenuse function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). + /// \param x first argument + /// \param y second argument + /// \param z third argument + /// \return square root of sum of squares without internal over- or underflows + /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root + inline half hypot(half x, half y, half z) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); + return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy+fz*fz))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0; + if(!absx) + return hypot(y, z); + if(!absy) + return hypot(x, z); + if(!absz) + return hypot(x, y); + if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) + return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) : + (absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) : + (absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) : + detail::signal(x.data_, y.data_, z.data_)); + if(absz > absy) + std::swap(absy, absz); + if(absy > absx) + std::swap(absx, absy); + if(absz > absy) + std::swap(absy, absz); + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + for(; absz<0x400; absz<<=1,--expz) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400; + mx *= mx; + my *= my; + mz *= mz; + int ix = mx >> 21, iy = my >> 21, iz = mz >> 21; + expx = 2*(expx+(absx>>10)) - 15 + ix; + expy = 2*(expy+(absy>>10)) - 15 + iy; + expz = 2*(expz+(absz>>10)) - 15 + iz; + mx <<= 10 - ix; + my <<= 10 - iy; + mz <<= 10 - iz; + int d = expy - expz; + mz = (d<30) ? ((mz>>d)|((mz&((static_cast(1)<>1) | (my&1); + if(++expy > expx) + { + std::swap(mx, my); + std::swap(expx, expy); + } + } + d = expx - expy; + my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); + #endif + } + + /// Power function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.00025% of inputs. + /// + /// **See also:** Documentation for [std::pow](https://en.cppreference.com/w/cpp/numeric/math/pow). + /// \param x base + /// \param y exponent + /// \return \a x raised to \a y + /// \exception FE_INVALID if \a x or \a y is signaling NaN or if \a x is finite an negative and \a y is finite and not integral + /// \exception FE_DIVBYZERO if \a x is 0 and \a y is negative + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half pow(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::pow(detail::half2float(x.data_), detail::half2float(y.data_)))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15; + if(!absy || x.data_ == 0x3C00) + return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_)); + bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1))); + unsigned int sign = x.data_ & (static_cast((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15); + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() : + (0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U)))); + if(!absx) + return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign); + if((x.data_&0x8000) && !is_int) + return half(detail::binary, detail::invalid()); + if(x.data_ == 0xBC00) + return half(detail::binary, sign|0x3C00); + switch(y.data_) + { + case 0x3800: return sqrt(x); + case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_)); + case 0x4000: return x * x; + case 0xBC00: return half(detail::binary, 0x3C00) / x; + } + for(; absx<0x400; absx<<=1,--exp) ; + detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = + (((ilog<<27)+((detail::log2(static_cast((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign; + for(exp=-11; m<0x80000000; m<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + m = detail::multiply64(m, static_cast((absy&0x3FF)|0x400)<<21); + int i = m >> 31; + exp += (absy>>10) + i; + m <<= 1 - i; + if(exp < 0) + { + f = m >> -exp; + exp = 0; + } + else + { + f = (m<> (31-exp); + } + return half(detail::binary, detail::exp2_post(f, exp, ((msign&1)^(y.data_>>15))!=0, sign)); + #endif + } + + /// \} + /// \anchor trigonometric + /// \name Trigonometric functions + /// \{ + + /// Compute sine and cosine simultaneously. + /// This returns the same results as sin() and cos() but is faster than calling each function individually. + /// + /// This function is exact to rounding for all rounding modes. + /// \param arg function argument + /// \param sin variable to take sine of \a arg + /// \param cos variable to take cosine of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline void sincos(half arg, half *sin, half *cos) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t f = detail::half2float(arg.data_); + *sin = half(detail::binary, detail::float2half(std::sin(f))); + *cos = half(detail::binary, detail::float2half(std::cos(f))); + #else + int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k; + if(abs >= 0x7C00) + *sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + else if(!abs) + { + *sin = arg; + *cos = half(detail::binary, 0x3C00); + } + else if(abs < 0x2500) + { + *sin = half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + } + else + { + if(half::round_style != std::round_to_nearest) + { + switch(abs) + { + case 0x48B7: + *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); + *cos = half(detail::binary, detail::rounded(0xBBFF, 1, 1)); + return; + case 0x598C: + *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x80FC, 1, 1)); + return; + case 0x6A64: + *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x27FF, 1, 1)); + return; + case 0x6D8C: + *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + return; + } + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + switch(k & 3) + { + case 1: sc = std::make_pair(sc.second, -sc.first); break; + case 2: sc = std::make_pair(-sc.first, -sc.second); break; + case 3: sc = std::make_pair(-sc.second, sc.first); break; + } + *sin = half(detail::binary, detail::fixed2half((sc.first^-static_cast(sign))+sign)); + *cos = half(detail::binary, detail::fixed2half(sc.second)); + } + #endif + } + + /// Sine function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::sin](https://en.cppreference.com/w/cpp/numeric/math/sin). + /// \param arg function argument + /// \return sine value of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half sin(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sin(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, k; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2900) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x48B7: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); + case 0x6A64: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); + case 0x6D8C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + detail::uint32 sign = -static_cast(((k>>1)&1)^(arg.data_>>15)); + return half(detail::binary, detail::fixed2half((((k&1) ? sc.second : sc.first)^sign) - sign)); + #endif + } + + /// Cosine function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::cos](https://en.cppreference.com/w/cpp/numeric/math/cos). + /// \param arg function argument + /// \return cosine value of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half cos(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::cos(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, k; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2500) + return half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + if(half::round_style != std::round_to_nearest && abs == 0x598C) + return half(detail::binary, detail::rounded(0x80FC, 1, 1)); + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + detail::uint32 sign = -static_cast(((k>>1)^k)&1); + return half(detail::binary, detail::fixed2half((((k&1) ? sc.first : sc.second)^sign) - sign)); + #endif + } + + /// Tangent function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::tan](https://en.cppreference.com/w/cpp/numeric/math/tan). + /// \param arg function argument + /// \return tangent value of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half tan(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::tan(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 13, k; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x658C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x07E6, 1, 1)); + case 0x7330: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x4B62, 1, 1)); + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 30); + if(k & 1) + sc = std::make_pair(-sc.second, sc.first); + detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second); + detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx; + for(; my<0x80000000; my<<=1,--exp) ; + for(; mx<0x80000000; mx<<=1,++exp) ; + return half(detail::binary, detail::tangent_post(my, mx, exp, (signy^signx^arg.data_)&0x8000)); + #endif + } + + /// Arc sine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::asin](https://en.cppreference.com/w/cpp/numeric/math/asin). + /// \param arg function argument + /// \return arc sine value of \a arg + /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half asin(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::asin(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(!abs) + return arg; + if(abs >= 0x3C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : + detail::rounded(sign|0x3E48, 0, 1)); + if(abs < 0x2900) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3)) + return half(detail::binary, detail::rounded(arg.data_+1, 1, 1)); + std::pair sc = detail::atan2_args(abs); + detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26); + return half(detail::binary, detail::fixed2half(m, 14, sign)); + #endif + } + + /// Arc cosine function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::acos](https://en.cppreference.com/w/cpp/numeric/math/acos). + /// \param arg function argument + /// \return arc cosine value of \a arg + /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half acos(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::acos(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15; + if(!abs) + return half(detail::binary, detail::rounded(0x3E48, 0, 1)); + if(abs >= 0x3C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : + sign ? detail::rounded(0x4248, 0, 1) : 0); + std::pair cs = detail::atan2_args(abs); + detail::uint32 m = detail::atan2(cs.second, cs.first, 28); + return half(detail::binary, detail::fixed2half(sign ? (0xC90FDAA2-m) : m, 15, 0, sign)); + #endif + } + + /// Arc tangent function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::atan](https://en.cppreference.com/w/cpp/numeric/math/atan). + /// \param arg function argument + /// \return arc tangent value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half atan(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::atan(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::rounded(sign|0x3E48, 0, 1) : detail::signal(arg.data_)); + if(abs <= 0x2700) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + int exp = (abs>>10) + (abs<=0x3FF); + detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10); + detail::uint32 m = (exp>15) ? detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) : + detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28); + return half(detail::binary, detail::fixed2half(m, 14, sign)); + #endif + } + + /// Arc tangent function. + /// This function may be 1 ULP off the correctly rounded exact result in ~0.005% of inputs for `std::round_to_nearest`, + /// in ~0.1% of inputs for `std::round_toward_zero` and in ~0.02% of inputs for any other rounding mode. + /// + /// **See also:** Documentation for [std::atan2](https://en.cppreference.com/w/cpp/numeric/math/atan2). + /// \param y numerator + /// \param x denominator + /// \return arc tangent value + /// \exception FE_INVALID if \a x or \a y is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half atan2(half y, half x) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::atan2(detail::half2float(y.data_), detail::half2float(x.data_)))); + #else + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + { + if(absx > 0x7C00 || absy > 0x7C00) + return half(detail::binary, detail::signal(x.data_, y.data_)); + if(absy == 0x7C00) + return half(detail::binary, (absx<0x7C00) ? detail::rounded(signy|0x3E48, 0, 1) : + signx ? detail::rounded(signy|0x40B6, 0, 1) : + detail::rounded(signy|0x3A48, 0, 1)); + return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); + } + if(!absy) + return signx ? half(detail::binary, detail::rounded(signy|0x4248, 0, 1)) : y; + if(!absx) + return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); + int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF); + if(d > (signx ? 18 : 12)) + return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); + if(signx && d < -11) + return half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); + if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) + { + for(; absy<0x400; absy<<=1,--d) ; + detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800; + int i = my < mx; + d -= i; + if(d < -25) + return half(detail::binary, detail::underflow(signy)); + my <<= 11 + i; + return half(detail::binary, detail::fixed2half(my/mx, d+14, signy, my%mx!=0)); + } + detail::uint32 m = detail::atan2( ((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)), + ((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1))); + return half(detail::binary, detail::fixed2half(signx ? (0xC90FDAA2-m) : m, 15, signy, signx)); + #endif + } + + /// \} + /// \anchor hyperbolic + /// \name Hyperbolic functions + /// \{ + + /// Hyperbolic sine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::sinh](https://en.cppreference.com/w/cpp/numeric/math/sinh). + /// \param arg function argument + /// \return hyperbolic sine value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half sinh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sinh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + if(abs <= 0x2900) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27); + detail::uint32 m = mm.first - mm.second; + for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ; + unsigned int sign = arg.data_ & 0x8000; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + return half(detail::binary, detail::fixed2half(m, exp, sign)); + #endif + } + + /// Hyperbolic cosine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::cosh](https://en.cppreference.com/w/cpp/numeric/math/cosh). + /// \param arg function argument + /// \return hyperbolic cosine value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half cosh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::cosh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00); + std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26); + detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31; + m = (m>>i) | (m&i) | 0x80000000; + if((exp+=13+i) > 29) + return half(detail::binary, detail::overflow()); + return half(detail::binary, detail::fixed2half(m, exp)); + #endif + } + + /// Hyperbolic tangent. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::tanh](https://en.cppreference.com/w/cpp/numeric/math/tanh). + /// \param arg function argument + /// \return hyperbolic tangent value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half tanh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::tanh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000)); + if(abs >= 0x4500) + return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest && abs == 0x2D3F) + return half(detail::binary, detail::rounded(arg.data_-3, 0, 1)); + std::pair mm = detail::hyperbolic_args(abs, exp, 27); + detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31; + for(exp=13; my<0x80000000; my<<=1,--exp) ; + mx = (mx>>i) | 0x80000000; + return half(detail::binary, detail::tangent_post(my, mx, exp-i, arg.data_&0x8000)); + #endif + } + + /// Hyperbolic area sine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::asinh](https://en.cppreference.com/w/cpp/numeric/math/asinh). + /// \param arg function argument + /// \return area sine value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half asinh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::asinh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + if(abs <= 0x2900) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x32D4: return half(detail::binary, detail::rounded(arg.data_-13, 1, 1)); + case 0x3B5B: return half(detail::binary, detail::rounded(arg.data_-197, 1, 1)); + } + return half(detail::binary, detail::area(arg.data_)); + #endif + } + + /// Hyperbolic area cosine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::acosh](https://en.cppreference.com/w/cpp/numeric/math/acosh). + /// \param arg function argument + /// \return area cosine value of \a arg + /// \exception FE_INVALID for signaling NaN or arguments <1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half acosh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::acosh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if((arg.data_&0x8000) || abs < 0x3C00) + return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs == 0x3C00) + return half(detail::binary, 0); + if(arg.data_ >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + return half(detail::binary, detail::area(arg.data_)); + #endif + } + + /// Hyperbolic area tangent. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::atanh](https://en.cppreference.com/w/cpp/numeric/math/atanh). + /// \param arg function argument + /// \return area tangent value of \a arg + /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 + /// \exception FE_DIVBYZERO for +/-1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half atanh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::atanh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 0; + if(!abs) + return arg; + if(abs >= 0x3C00) + return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + detail::uint32 m = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m; + for(; mx<0x80000000; mx<<=1,++exp) ; + int i = my >= mx, s; + return half(detail::binary, detail::log2_post(detail::log2( + (detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000)); + #endif + } + + /// \} + /// \anchor special + /// \name Error and gamma functions + /// \{ + + /// Error function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. + /// + /// **See also:** Documentation for [std::erf](https://en.cppreference.com/w/cpp/numeric/math/erf). + /// \param arg function argument + /// \return error function value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half erf(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::erf(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF; + if(!abs || abs >= 0x7C00) + return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg; + if(abs >= 0x4200) + return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + return half(detail::binary, detail::erf(arg.data_)); + #endif + } + + /// Complementary error function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. + /// + /// **See also:** Documentation for [std::erfc](https://en.cppreference.com/w/cpp/numeric/math/erfc). + /// \param arg function argument + /// \return 1 minus error function value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half erfc(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::erfc(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(abs >= 0x7C00) + return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x4400) + return half(detail::binary, detail::rounded((sign>>1)-(sign>>15), sign>>15, 1)); + return half(detail::binary, detail::erf(arg.data_)); + #endif + } + + /// Natural logarithm of gamma function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.025% of inputs. + /// + /// **See also:** Documentation for [std::lgamma](https://en.cppreference.com/w/cpp/numeric/math/lgamma). + /// \param arg function argument + /// \return natural logarith of gamma function for \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_DIVBYZERO for 0 or negative integer arguments + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half lgamma(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::lgamma(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); + if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) + return half(detail::binary, detail::pole()); + if(arg.data_ == 0x3C00 || arg.data_ == 0x4000) + return half(detail::binary, 0); + return half(detail::binary, detail::gamma(arg.data_)); + #endif + } + + /// Gamma function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.25% of inputs. + /// + /// **See also:** Documentation for [std::tgamma](https://en.cppreference.com/w/cpp/numeric/math/tgamma). + /// \param arg function argument + /// \return gamma function value of \a arg + /// \exception FE_INVALID for signaling NaN, negative infinity or negative integer arguments + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half tgamma(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::tgamma(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF; + if(!abs) + return half(detail::binary, detail::pole(arg.data_)); + if(abs >= 0x7C00) + return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) + return half(detail::binary, detail::invalid()); + if(arg.data_ >= 0xCA80) + return half(detail::binary, detail::underflow((1-((abs>>(25-(abs>>10)))&1))<<15)); + if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000)) + return half(detail::binary, detail::overflow()); + if(arg.data_ == 0x3C00) + return arg; + return half(detail::binary, detail::gamma(arg.data_)); + #endif + } + + /// \} + /// \anchor rounding + /// \name Rounding + /// \{ + + /// Nearest integer not less than half value. + /// **See also:** Documentation for [std::ceil](https://en.cppreference.com/w/cpp/numeric/math/ceil). + /// \param arg half to round + /// \return nearest integer not less than \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half ceil(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer not greater than half value. + /// **See also:** Documentation for [std::floor](https://en.cppreference.com/w/cpp/numeric/math/floor). + /// \param arg half to round + /// \return nearest integer not greater than \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half floor(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer not greater in magnitude than half value. + /// **See also:** Documentation for [std::trunc](https://en.cppreference.com/w/cpp/numeric/math/trunc). + /// \param arg half to round + /// \return nearest integer not greater in magnitude than \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half trunc(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer. + /// **See also:** Documentation for [std::round](https://en.cppreference.com/w/cpp/numeric/math/round). + /// \param arg half to round + /// \return nearest integer, rounded away from zero in half-way cases + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half round(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer. + /// **See also:** Documentation for [std::lround](https://en.cppreference.com/w/cpp/numeric/math/round). + /// \param arg half to round + /// \return nearest integer, rounded away from zero in half-way cases + /// \exception FE_INVALID if value is not representable as `long` + inline long lround(half arg) { return detail::half2int(arg.data_); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::rint](https://en.cppreference.com/w/cpp/numeric/math/rint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half rint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::lrint](https://en.cppreference.com/w/cpp/numeric/math/rint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID if value is not representable as `long` + /// \exception FE_INEXACT if value had to be rounded + inline long lrint(half arg) { return detail::half2int(arg.data_); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::nearbyint](https://en.cppreference.com/w/cpp/numeric/math/nearbyint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID for signaling NaN + inline half nearbyint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } +#if HALF_ENABLE_CPP11_LONG_LONG + /// Nearest integer. + /// **See also:** Documentation for [std::llround](https://en.cppreference.com/w/cpp/numeric/math/round). + /// \param arg half to round + /// \return nearest integer, rounded away from zero in half-way cases + /// \exception FE_INVALID if value is not representable as `long long` + inline long long llround(half arg) { return detail::half2int(arg.data_); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::llrint](https://en.cppreference.com/w/cpp/numeric/math/rint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID if value is not representable as `long long` + /// \exception FE_INEXACT if value had to be rounded + inline long long llrint(half arg) { return detail::half2int(arg.data_); } +#endif + + /// \} + /// \anchor float + /// \name Floating point manipulation + /// \{ + + /// Decompress floating-point number. + /// **See also:** Documentation for [std::frexp](https://en.cppreference.com/w/cpp/numeric/math/frexp). + /// \param arg number to decompress + /// \param exp address to store exponent at + /// \return significant in range [0.5, 1) + /// \exception FE_INVALID for signaling NaN + inline half frexp(half arg, int *exp) + { + *exp = 0; + unsigned int abs = arg.data_ & 0x7FFF; + if(abs >= 0x7C00 || !abs) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--*exp) ; + *exp += (abs>>10) - 14; + return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF)); + } + + /// Multiply by power of two. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::scalbln](https://en.cppreference.com/w/cpp/numeric/math/scalbn). + /// \param arg number to modify + /// \param exp power of two to multiply with + /// \return \a arg multplied by 2 raised to \a exp + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half scalbln(half arg, long exp) + { + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(abs >= 0x7C00 || !abs) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + if(exp > 30) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -10) + return half(detail::binary, detail::underflow(sign)); + else if(exp > 0) + return half(detail::binary, sign|(exp<<10)|(abs&0x3FF)); + unsigned int m = (abs&0x3FF) | 0x400; + return half(detail::binary, detail::rounded(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0)); + } + + /// Multiply by power of two. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::scalbn](https://en.cppreference.com/w/cpp/numeric/math/scalbn). + /// \param arg number to modify + /// \param exp power of two to multiply with + /// \return \a arg multplied by 2 raised to \a exp + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half scalbn(half arg, int exp) { return scalbln(arg, exp); } + + /// Multiply by power of two. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::ldexp](https://en.cppreference.com/w/cpp/numeric/math/ldexp). + /// \param arg number to modify + /// \param exp power of two to multiply with + /// \return \a arg multplied by 2 raised to \a exp + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half ldexp(half arg, int exp) { return scalbln(arg, exp); } + + /// Extract integer and fractional parts. + /// **See also:** Documentation for [std::modf](https://en.cppreference.com/w/cpp/numeric/math/modf). + /// \param arg number to decompress + /// \param iptr address to store integer part at + /// \return fractional part + /// \exception FE_INVALID for signaling NaN + inline half modf(half arg, half *iptr) + { + unsigned int abs = arg.data_ & 0x7FFF; + if(abs > 0x7C00) + { + arg = half(detail::binary, detail::signal(arg.data_)); + return *iptr = arg, arg; + } + if(abs >= 0x6400) + return *iptr = arg, half(detail::binary, arg.data_&0x8000); + if(abs < 0x3C00) + return iptr->data_ = arg.data_ & 0x8000, arg; + unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask; + iptr->data_ = arg.data_ & ~mask; + if(!m) + return half(detail::binary, arg.data_&0x8000); + for(; m<0x400; m<<=1,--exp) ; + return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF)); + } + + /// Extract exponent. + /// **See also:** Documentation for [std::ilogb](https://en.cppreference.com/w/cpp/numeric/math/ilogb). + /// \param arg number to query + /// \return floating-point exponent + /// \retval FP_ILOGB0 for zero + /// \retval FP_ILOGBNAN for NaN + /// \retval INT_MAX for infinity + /// \exception FE_INVALID for 0 or infinite values + inline int ilogb(half arg) + { + int abs = arg.data_ & 0x7FFF, exp; + if(!abs || abs >= 0x7C00) + { + detail::raise(FE_INVALID); + return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN; + } + for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; + return exp; + } + + /// Extract exponent. + /// **See also:** Documentation for [std::logb](https://en.cppreference.com/w/cpp/numeric/math/logb). + /// \param arg number to query + /// \return floating-point exponent + /// \exception FE_INVALID for signaling NaN + /// \exception FE_DIVBYZERO for 0 + inline half logb(half arg) + { + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); + for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; + unsigned int value = static_cast(exp<0) << 15; + if(exp) + { + unsigned int m = std::abs(exp) << 6; + for(exp=18; m<0x400; m<<=1,--exp) ; + value |= (exp<<10) + m; + } + return half(detail::binary, value); + } + + /// Next representable value. + /// **See also:** Documentation for [std::nextafter](https://en.cppreference.com/w/cpp/numeric/math/nextafter). + /// \param from value to compute next representable value for + /// \param to direction towards which to compute next value + /// \return next representable value after \a from in direction towards \a to + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW for infinite result from finite argument + /// \exception FE_UNDERFLOW for subnormal result + inline half nextafter(half from, half to) + { + int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; + if(fabs > 0x7C00 || tabs > 0x7C00) + return half(detail::binary, detail::signal(from.data_, to.data_)); + if(from.data_ == to.data_ || !(fabs|tabs)) + return to; + if(!fabs) + { + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); + return half(detail::binary, (to.data_&0x8000)+1); + } + unsigned int out = from.data_ + (((from.data_>>15)^static_cast( + (from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1; + detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00); + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7C00)<0x400); + return half(detail::binary, out); + } + + /// Next representable value. + /// **See also:** Documentation for [std::nexttoward](https://en.cppreference.com/w/cpp/numeric/math/nexttoward). + /// \param from value to compute next representable value for + /// \param to direction towards which to compute next value + /// \return next representable value after \a from in direction towards \a to + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW for infinite result from finite argument + /// \exception FE_UNDERFLOW for subnormal result + inline half nexttoward(half from, long double to) + { + int fabs = from.data_ & 0x7FFF; + if(fabs > 0x7C00) + return half(detail::binary, detail::signal(from.data_)); + long double lfrom = static_cast(from); + if(detail::builtin_isnan(to) || lfrom == to) + return half(static_cast(to)); + if(!fabs) + { + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); + return half(detail::binary, (static_cast(detail::builtin_signbit(to))<<15)+1); + } + unsigned int out = from.data_ + (((from.data_>>15)^static_cast(lfrom 0x7C00; } + + /// Check if normal number. + /// **See also:** Documentation for [std::isnormal](https://en.cppreference.com/w/cpp/numeric/math/isnormal). + /// \param arg number to check + /// \retval true if normal number + /// \retval false if either subnormal, zero, infinity or NaN + inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } + + /// Check sign. + /// **See also:** Documentation for [std::signbit](https://en.cppreference.com/w/cpp/numeric/math/signbit). + /// \param arg number to check + /// \retval true for negative number + /// \retval false for positive number + inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; } + + /// \} + /// \anchor compfunc + /// \name Comparison + /// \{ + + /// Quiet comparison for greater than. + /// **See also:** Documentation for [std::isgreater](https://en.cppreference.com/w/cpp/numeric/math/isgreater). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater than \a y + /// \retval false else + inline HALF_CONSTEXPR bool isgreater(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comparison for greater equal. + /// **See also:** Documentation for [std::isgreaterequal](https://en.cppreference.com/w/cpp/numeric/math/isgreaterequal). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater equal \a y + /// \retval false else + inline HALF_CONSTEXPR bool isgreaterequal(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comparison for less than. + /// **See also:** Documentation for [std::isless](https://en.cppreference.com/w/cpp/numeric/math/isless). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less than \a y + /// \retval false else + inline HALF_CONSTEXPR bool isless(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comparison for less equal. + /// **See also:** Documentation for [std::islessequal](https://en.cppreference.com/w/cpp/numeric/math/islessequal). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less equal \a y + /// \retval false else + inline HALF_CONSTEXPR bool islessequal(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comarison for less or greater. + /// **See also:** Documentation for [std::islessgreater](https://en.cppreference.com/w/cpp/numeric/math/islessgreater). + /// \param x first operand + /// \param y second operand + /// \retval true if either less or greater + /// \retval false else + inline HALF_CONSTEXPR bool islessgreater(half x, half y) + { + return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y); + } + + /// Quiet check if unordered. + /// **See also:** Documentation for [std::isunordered](https://en.cppreference.com/w/cpp/numeric/math/isunordered). + /// \param x first operand + /// \param y second operand + /// \retval true if unordered (one or two NaN operands) + /// \retval false else + inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); } + + /// \} + /// \anchor casting + /// \name Casting + /// \{ + + /// Cast to or from half-precision floating-point number. + /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted + /// directly using the default rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. + /// + /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types + /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler + /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. + /// \tparam T destination type (half or built-in arithmetic type) + /// \tparam U source type (half or built-in arithmetic type) + /// \param arg value to cast + /// \return \a arg converted to destination type + /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + template T half_cast(U arg) { return detail::half_caster::cast(arg); } + + /// Cast to or from half-precision floating-point number. + /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted + /// directly using the specified rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. + /// + /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types + /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler + /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. + /// \tparam T destination type (half or built-in arithmetic type) + /// \tparam R rounding mode to use. + /// \tparam U source type (half or built-in arithmetic type) + /// \param arg value to cast + /// \return \a arg converted to destination type + /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + template T half_cast(U arg) { return detail::half_caster::cast(arg); } + /// \} + + /// \} + /// \anchor errors + /// \name Error handling + /// \{ + + /// Clear exception flags. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::feclearexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feclearexcept). + /// \param excepts OR of exceptions to clear + /// \retval 0 all selected flags cleared successfully + inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; } + + /// Test exception flags. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::fetestexcept](https://en.cppreference.com/w/cpp/numeric/fenv/fetestexcept). + /// \param excepts OR of exceptions to test + /// \return OR of selected exceptions if raised + inline int fetestexcept(int excepts) { return detail::errflags() & excepts; } + + /// Raise exception flags. + /// This raises the specified floating point exceptions and also invokes any additional automatic exception handling as + /// configured with the [HALF_ERRHANDLIG_...](\ref HALF_ERRHANDLING_ERRNO) preprocessor symbols. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::feraiseexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feraiseexcept). + /// \param excepts OR of exceptions to raise + /// \retval 0 all selected exceptions raised successfully + inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; } + + /// Save exception flags. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::fegetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). + /// \param flagp adress to store flag state at + /// \param excepts OR of flags to save + /// \retval 0 for success + inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; } + + /// Restore exception flags. + /// This only copies the specified exception state (including unset flags) without incurring any additional exception handling. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::fesetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). + /// \param flagp adress to take flag state from + /// \param excepts OR of flags to restore + /// \retval 0 for success + inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; } + + /// Throw C++ exceptions based on set exception flags. + /// This function manually throws a corresponding C++ exception if one of the specified flags is set, + /// no matter if automatic throwing (via [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID)) is enabled or not. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// \param excepts OR of exceptions to test + /// \param msg error message to use for exception description + /// \throw std::domain_error if `FE_INVALID` or `FE_DIVBYZERO` is selected and set + /// \throw std::overflow_error if `FE_OVERFLOW` is selected and set + /// \throw std::underflow_error if `FE_UNDERFLOW` is selected and set + /// \throw std::range_error if `FE_INEXACT` is selected and set + inline void fethrowexcept(int excepts, const char *msg = "") + { + excepts &= detail::errflags(); + if(excepts & (FE_INVALID|FE_DIVBYZERO)) + throw std::domain_error(msg); + if(excepts & FE_OVERFLOW) + throw std::overflow_error(msg); + if(excepts & FE_UNDERFLOW) + throw std::underflow_error(msg); + if(excepts & FE_INEXACT) + throw std::range_error(msg); + } + /// \} +} + + +#undef HALF_UNUSED_NOERR +#undef HALF_CONSTEXPR +#undef HALF_CONSTEXPR_CONST +#undef HALF_CONSTEXPR_NOERR +#undef HALF_NOEXCEPT +#undef HALF_NOTHROW +#undef HALF_THREAD_LOCAL +#undef HALF_TWOS_COMPLEMENT_INT +#ifdef HALF_POP_WARNINGS + #pragma warning(pop) + #undef HALF_POP_WARNINGS +#endif + +#endif diff --git a/external_libs/runtime/CMakeLists.txt b/external_libs/runtime/CMakeLists.txt new file mode 100644 index 000000000..6de7dcb59 --- /dev/null +++ b/external_libs/runtime/CMakeLists.txt @@ -0,0 +1,16 @@ +## Copyright (c) ByteDance Inc. All rights reserved. +## Licensed under the Apache License, Version 2.0 + +# Minimum CMake required +cmake_minimum_required(VERSION 3.18) +set(CMAKE_CXX_STANDARD 17) + +project(brt-libs LANGUAGES CXX CUDA) + + +set(REPO_ROOT ${PROJECT_SOURCE_DIR}) +message("REPO_ROOT = ${REPO_ROOT}") +set(CUTLASS_ROOT ${REPO_ROOT}/../external/cutlass) +message("CUTLASS_ROOT = ${CUTLASS_ROOT}") + +add_subdirectory(flash_attn) diff --git a/external_libs/runtime/README.md b/external_libs/runtime/README.md new file mode 100644 index 000000000..129e74d9a --- /dev/null +++ b/external_libs/runtime/README.md @@ -0,0 +1,14 @@ +# Runtime External Libs + +Runtime external library contains standalone kernels that can be used externally, eg. used by ByteIR Runtime. + +## Build +### Linux/Mac +```bash +mkdir ./build + +# build runtime +cd build && cmake .. -G Ninja + +cmake --build . --target all +``` diff --git a/external_libs/runtime/flash_attn/CMakeLists.txt b/external_libs/runtime/flash_attn/CMakeLists.txt new file mode 100644 index 000000000..8570eeb13 --- /dev/null +++ b/external_libs/runtime/flash_attn/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(lib) \ No newline at end of file diff --git a/external_libs/runtime/flash_attn/include/flash_api.h b/external_libs/runtime/flash_attn/include/flash_api.h new file mode 100644 index 000000000..dfcd4fee6 --- /dev/null +++ b/external_libs/runtime/flash_attn/include/flash_api.h @@ -0,0 +1,95 @@ +#include "flash.h" +#include + +#if defined(_WIN32) + +#ifndef EXPORT_API +#define EXPORT_API __declspec(dllexport) +#else +#define EXPORT_API __attribute__((visibility("default"))) +#endif + +void print_Qkv_params(Qkv_params ¶ms); +void print_Flash_fwd_params(Flash_fwd_params ¶ms); +void print_Flash_bwd_params(Flash_bwd_params ¶ms); +void run_mha(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, + void *softmax_lse_ptr, void *softmax_ptr, void *rng_state_ptr, + + uint32_t q_batch_stride, uint32_t k_batch_stride, + uint32_t v_batch_stride, uint32_t o_batch_stride, + + uint32_t q_row_stride, uint32_t k_row_stride, + uint32_t v_row_stride, uint32_t o_row_stride, + + uint32_t q_head_stride, uint32_t k_head_stride, + uint32_t v_head_stride, uint32_t o_head_stride, + + uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, + uint32_t d_rounded, float softmax_scale, + + uint32_t seqlen_q, uint32_t seqlen_k, uint32_t seqlen_q_rounded, + uint32_t seqlen_k_rounded, + + float p_dropout, int window_size_left, int window_size_right, + cudaStream_t stream); + +void run_mha_bwd(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, + void *dout_ptr, void *dq_ptr, void *dk_ptr, void *dv_ptr, + void *dq_accum_ptr, void *softmax_lse_ptr, + void *dsoftmax_sum_ptr, void *rng_state_ptr, + + uint32_t q_batch_stride, uint32_t k_batch_stride, + uint32_t v_batch_stride, uint32_t o_batch_stride, + + uint32_t q_row_stride, uint32_t k_row_stride, + uint32_t v_row_stride, uint32_t o_row_stride, + + uint32_t q_head_stride, uint32_t k_head_stride, + uint32_t v_head_stride, uint32_t o_head_stride, + + uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, + uint32_t d_rounded, float softmax_scale, + + uint32_t seqlen_q, uint32_t seqlen_k, + uint32_t seqlen_q_rounded, uint32_t seqlen_k_rounded, + + float p_dropout, int window_size_left, int window_size_right, + cudaStream_t stream); + +void run_mha_fwd_with_kvcache( + void *q_ptr, void *k_ptr, void *v_ptr, void *knew_ptr, void *vnew_ptr, + void *seqlens_k_, void *o_ptr, void *softmax_lse_ptr, + + uint32_t q_batch_stride, uint32_t k_batch_stride, uint32_t v_batch_stride, + uint32_t knew_batch_stride, uint32_t vnew_batch_stride, + uint32_t o_batch_stride, + + uint32_t q_row_stride, uint32_t k_row_stride, uint32_t v_row_stride, + uint32_t knew_row_stride, uint32_t vnew_row_stride, uint32_t o_row_stride, + + uint32_t q_head_stride, uint32_t k_head_stride, uint32_t v_head_stride, + uint32_t knew_head_stride, uint32_t vnew_head_stride, + uint32_t o_head_stride, + + uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, uint32_t d_rounded, + uint32_t seqlen_knew, float softmax_scale, + + uint32_t seqlen_q, uint32_t seqlen_k, uint32_t seqlen_q_rounded, + uint32_t seqlen_k_rounded, + + int window_size_left, int window_size_right, cudaStream_t stream); + +#ifdef __cplusplus +extern "C" { +#endif +EXPORT_API void run_flash_attn_fwd(void **tensors, void *extra_args, + cudaStream_t stream); + +EXPORT_API void run_flash_attn_bwd(void **tensors, void *extra_args, + cudaStream_t stream); + +EXPORT_API void run_flash_attn_kvcache(void **tensors, void *extra_args, + cudaStream_t stream); +#ifdef __cplusplus +} +#endif diff --git a/external_libs/runtime/flash_attn/lib/CMakeLists.txt b/external_libs/runtime/flash_attn/lib/CMakeLists.txt new file mode 100644 index 000000000..650406d2f --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/CMakeLists.txt @@ -0,0 +1,40 @@ +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -std=c++17 --expt-relaxed-constexpr --expt-extended-lambda --use_fast_math \ + -gencode=arch=compute_80,code=\\\"sm_80,compute_80\\\" \ + -U__CUDA_NO_HALF_OPERATORS__ \ + -U__CUDA_NO_HALF_CONVERSIONS__ \ + -U__CUDA_NO_HALF2_OPERATORS__ \ + -U__CUDA_NO_BFLOAT16_CONVERSIONS__ \ + ") + +include_directories(${CUTLASS_ROOT}/include) +include_directories(../include) + +add_library(flash_attn SHARED + flash_api.cu + flash_fwd_hdim32_fp16_sm80.cu + flash_fwd_hdim64_fp16_sm80.cu + flash_fwd_hdim96_fp16_sm80.cu + flash_fwd_hdim128_fp16_sm80.cu + flash_fwd_hdim160_fp16_sm80.cu + flash_fwd_hdim192_fp16_sm80.cu + flash_fwd_hdim224_fp16_sm80.cu + flash_fwd_hdim256_fp16_sm80.cu + flash_fwd_split_hdim32_fp16_sm80.cu + flash_fwd_split_hdim64_fp16_sm80.cu + flash_fwd_split_hdim96_fp16_sm80.cu + flash_fwd_split_hdim128_fp16_sm80.cu + flash_fwd_split_hdim160_fp16_sm80.cu + flash_fwd_split_hdim192_fp16_sm80.cu + flash_fwd_split_hdim224_fp16_sm80.cu + flash_fwd_split_hdim256_fp16_sm80.cu + flash_bwd_hdim32_fp16_sm80.cu + flash_bwd_hdim64_fp16_sm80.cu + flash_bwd_hdim96_fp16_sm80.cu + flash_bwd_hdim128_fp16_sm80.cu + flash_bwd_hdim160_fp16_sm80.cu + flash_bwd_hdim192_fp16_sm80.cu + flash_bwd_hdim224_fp16_sm80.cu + flash_bwd_hdim256_fp16_sm80.cu +) + +set_target_properties(flash_attn PROPERTIES CUDA_ARCHITECTURES "80") diff --git a/external_libs/runtime/flash_attn/lib/alibi.h b/external_libs/runtime/flash_attn/lib/alibi.h new file mode 100644 index 000000000..1afb3687d --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/alibi.h @@ -0,0 +1,62 @@ +#include + +#include + +#include +#include + +#include "utils.h" + +namespace flash { + +using namespace cute; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void apply_alibi(Tensor &tensor, + const int col_idx_offset_, + const int max_seqlen_k, + const int row_idx_offset, + const int max_seqlen_q, + const int warp_row_stride, + const float alibi_slope) { + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout::rank == 2, "Only support 2D Tensor"); + const int lane_id = threadIdx.x % 32; + const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; + if constexpr (Is_causal) { // Simpler, we add the same bias vector to all rows + #pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; + #pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; + #pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + tensor(mi, make_coord(j, nj)) += alibi_slope * col_idx; + } + } + } + } else { // Bias depends on both row_idx and col_idx + #pragma unroll + for (int mi = 0; mi < size<0, 1>(tensor); ++mi) { + const int row_idx_base = row_idx_offset + mi * warp_row_stride; + #pragma unroll + for (int i = 0; i < size<0, 0>(tensor); ++i) { + const int row_idx = row_idx_base + i * 8; + #pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; + #pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; + tensor(make_coord(i, mi), make_coord(j, nj)) -= alibi_slope * abs(row_idx + max_seqlen_k - max_seqlen_q - col_idx); + } + } + } + } + } +} + +} // namespace flash diff --git a/external_libs/runtime/flash_attn/lib/block_info.h b/external_libs/runtime/flash_attn/lib/block_info.h new file mode 100644 index 000000000..65435e51a --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/block_info.h @@ -0,0 +1,46 @@ +/****************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +namespace flash { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct BlockInfo { + + template + __device__ BlockInfo(const Params ¶ms, const int bidb) + : sum_s_q(!Varlen || params.cu_seqlens_q == nullptr ? -1 : params.cu_seqlens_q[bidb]) + , sum_s_k(!Varlen || params.cu_seqlens_k == nullptr || !params.is_seqlens_k_cumulative ? -1 : params.cu_seqlens_k[bidb]) + , actual_seqlen_q(!Varlen || params.cu_seqlens_q == nullptr ? params.seqlen_q : params.cu_seqlens_q[bidb + 1] - sum_s_q) + // If is_seqlens_k_cumulative, then seqlen_k is cu_seqlens_k[bidb + 1] - cu_seqlens_k[bidb]. + // Otherwise it's cu_seqlens_k[bidb], i.e., we use cu_seqlens_k to store the sequence lengths of K. + , seqlen_k_cache(!Varlen || params.cu_seqlens_k == nullptr ? params.seqlen_k : (params.is_seqlens_k_cumulative ? params.cu_seqlens_k[bidb + 1] - sum_s_k : params.cu_seqlens_k[bidb])) + , actual_seqlen_k(params.seqused_k ? params.seqused_k[bidb] : seqlen_k_cache + (params.knew_ptr == nullptr ? 0 : params.seqlen_knew)) + { + } + + template + inline __device__ index_t q_offset(const index_t batch_stride, const index_t row_stride, const int bidb) const { + return sum_s_q == -1 ? bidb * batch_stride : uint32_t(sum_s_q) * row_stride; + } + + template + inline __device__ index_t k_offset(const index_t batch_stride, const index_t row_stride, const int bidb) const { + return sum_s_k == -1 ? bidb * batch_stride : uint32_t(sum_s_k) * row_stride; + } + + const int sum_s_q; + const int sum_s_k; + const int actual_seqlen_q; + // We have to have seqlen_k_cache declared before actual_seqlen_k, otherwise actual_seqlen_k is set to 0. + const int seqlen_k_cache; + const int actual_seqlen_k; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace flash diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash.h b/external_libs/runtime/flash_attn/lib/flash.h similarity index 73% rename from runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash.h rename to external_libs/runtime/flash_attn/lib/flash.h index 1bfdd7f14..8fa4e6ccd 100644 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash.h +++ b/external_libs/runtime/flash_attn/lib/flash.h @@ -15,9 +15,6 @@ // #endif // #include -namespace brt { -namespace cuda { -namespace kernel { constexpr int TOTAL_DIM = 0; constexpr int H_DIM = 1; @@ -56,6 +53,7 @@ struct Flash_fwd_params : public Qkv_params { // The O matrix (output). void *__restrict__ o_ptr; + void *__restrict__ oaccum_ptr; // The stride between rows of O. index_t o_batch_stride; @@ -67,9 +65,11 @@ struct Flash_fwd_params : public Qkv_params { // The pointer to the softmax sum. void *__restrict__ softmax_lse_ptr; + void *__restrict__ softmax_lseaccum_ptr; // The dimensions. - int b, seqlen_q, seqlen_k, d, seqlen_q_rounded, seqlen_k_rounded, d_rounded; + int b, seqlen_q, seqlen_k, seqlen_knew, d, seqlen_q_rounded, seqlen_k_rounded, + d_rounded, rotary_dim; // The scaling factors for the kernel. float scale_softmax; @@ -79,8 +79,30 @@ struct Flash_fwd_params : public Qkv_params { int *__restrict__ cu_seqlens_q; int *__restrict__ cu_seqlens_k; + // If provided, the actual length of each k sequence. + int *__restrict__ seqused_k; + int *__restrict__ blockmask; + // The K_new and V_new matrices. + void *__restrict__ knew_ptr; + void *__restrict__ vnew_ptr; + + // The stride between rows of the Q, K and V matrices. + index_t knew_batch_stride; + index_t vnew_batch_stride; + index_t knew_row_stride; + index_t vnew_row_stride; + index_t knew_head_stride; + index_t vnew_head_stride; + + // The cos and sin matrices for rotary embedding. + void *__restrict__ rotary_cos_ptr; + void *__restrict__ rotary_sin_ptr; + + // The indices to index into the KV cache. + int *__restrict__ cache_batch_idx; + // The dropout probability (probability of keeping an activation). float p_dropout; // uint32_t p_dropout_in_uint; @@ -91,6 +113,9 @@ struct Flash_fwd_params : public Qkv_params { float rp_dropout; float scale_softmax_rp_dropout; + // Local window size + int window_size_left, window_size_right; + // Random state. // at::PhiloxCudaState philox_args; @@ -99,6 +124,18 @@ struct Flash_fwd_params : public Qkv_params { bool is_bf16; bool is_causal; + + // If is_seqlens_k_cumulative, then seqlen_k is cu_seqlens_k[bidb + 1] - + // cu_seqlens_k[bidb]. Otherwise it's cu_seqlens_k[bidb], i.e., we use + // cu_seqlens_k to store the sequence lengths of K. + bool is_seqlens_k_cumulative; + + bool is_rotary_interleaved; + + int num_splits; // For split-KV version + + void *__restrict__ alibi_slopes_ptr; + index_t alibi_slopes_batch_stride; }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -138,16 +175,19 @@ struct Flash_bwd_params : public Flash_fwd_params { // The pointer to the softmax d sum. void *__restrict__ dsoftmax_sum; + + bool deterministic; + index_t dq_accum_split_stride; }; //////////////////////////////////////////////////////////////////////////////////////////////////// template void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream); +template +void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, + cudaStream_t stream); template void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure); -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/external_libs/runtime/flash_attn/lib/flash_api.cu b/external_libs/runtime/flash_attn/lib/flash_api.cu new file mode 100644 index 000000000..d05b383f1 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_api.cu @@ -0,0 +1,782 @@ +#include "flash.h" +#include "flash_fwd_launch_template.h" +#include + +// TODO: Switch back to handling bf16. +// void run_mha_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { +// FWD_HEADDIM_SWITCH(params.d, [&] { +// run_mha_fwd_(params, stream); +// }); +// } + +// void run_mha_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { +// FP16_SWITCH(!params.is_bf16, [&] { +// FWD_HEADDIM_SWITCH(params.d, [&] { +// run_mha_fwd_(params, stream); +// }); +// }); +// } + +// for debug +void print_Qkv_params(Qkv_params ¶ms) { + std::cout << "q_batch_stride: " << params.q_batch_stride << std::endl; + std::cout << "k_batch_stride: " << params.k_batch_stride << std::endl; + std::cout << "v_batch_stride: " << params.v_batch_stride << std::endl; + std::cout << "q_row_stride: " << params.q_row_stride << std::endl; + std::cout << "k_row_stride: " << params.k_row_stride << std::endl; + std::cout << "v_row_stride: " << params.v_row_stride << std::endl; + std::cout << "q_head_stride: " << params.q_head_stride << std::endl; + std::cout << "k_head_stride: " << params.k_head_stride << std::endl; + std::cout << "v_head_stride: " << params.v_head_stride << std::endl; + std::cout << "h: " << params.h << std::endl; + std::cout << "h_k: " << params.h_k << std::endl; + std::cout << "h_h_k_ratio: " << params.h_h_k_ratio << std::endl; +} + +void print_Flash_fwd_params(Flash_fwd_params ¶ms) { + std::cout << "q_batch_stride: " << params.q_batch_stride << std::endl; + std::cout << "k_batch_stride: " << params.k_batch_stride << std::endl; + std::cout << "v_batch_stride: " << params.v_batch_stride << std::endl; + std::cout << "q_row_stride: " << params.q_row_stride << std::endl; + std::cout << "k_row_stride: " << params.k_row_stride << std::endl; + std::cout << "v_row_stride: " << params.v_row_stride << std::endl; + std::cout << "q_head_stride: " << params.q_head_stride << std::endl; + std::cout << "k_head_stride: " << params.k_head_stride << std::endl; + std::cout << "v_head_stride: " << params.v_head_stride << std::endl; + std::cout << "h: " << params.h << std::endl; + std::cout << "h_k: " << params.h_k << std::endl; + std::cout << "h_h_k_ratio: " << params.h_h_k_ratio << std::endl; + + std::cout << "o_batch_stride: " << params.o_batch_stride << std::endl; + std::cout << "o_row_stride: " << params.o_row_stride << std::endl; + std::cout << "o_head_stride: " << params.o_head_stride << std::endl; + std::cout << "b: " << params.b << std::endl; + std::cout << "seqlen_q: " << params.seqlen_q << std::endl; + std::cout << "seqlen_k: " << params.seqlen_k << std::endl; + std::cout << "d: " << params.d << std::endl; + std::cout << "seqlen_q_rounded: " << params.seqlen_q_rounded << std::endl; + std::cout << "seqlen_k_rounded: " << params.seqlen_k_rounded << std::endl; + std::cout << "d_rounded: " << params.d_rounded << std::endl; + std::cout << "scale_softmax: " << params.scale_softmax << std::endl; + std::cout << "scale_softmax_log2: " << params.scale_softmax_log2 << std::endl; + std::cout << "p_dropout: " << params.p_dropout << std::endl; + std::cout << "p_dropout_in_uint8_t: " << params.p_dropout_in_uint8_t + << std::endl; + std::cout << "rp_dropout: " << params.rp_dropout << std::endl; + std::cout << "scale_softmax_rp_dropout: " << params.scale_softmax_rp_dropout + << std::endl; + std::cout << "is_bf16: " << params.is_bf16 << std::endl; + std::cout << "is_causal: " << params.is_causal << std::endl; +} + +void print_Flash_bwd_params(Flash_bwd_params ¶ms) { + std::cout << "q_batch_stride: " << params.q_batch_stride << std::endl; + std::cout << "k_batch_stride: " << params.k_batch_stride << std::endl; + std::cout << "v_batch_stride: " << params.v_batch_stride << std::endl; + std::cout << "q_row_stride: " << params.q_row_stride << std::endl; + std::cout << "k_row_stride: " << params.k_row_stride << std::endl; + std::cout << "v_row_stride: " << params.v_row_stride << std::endl; + std::cout << "q_head_stride: " << params.q_head_stride << std::endl; + std::cout << "k_head_stride: " << params.k_head_stride << std::endl; + std::cout << "v_head_stride: " << params.v_head_stride << std::endl; + std::cout << "h: " << params.h << std::endl; + std::cout << "h_k: " << params.h_k << std::endl; + std::cout << "h_h_k_ratio: " << params.h_h_k_ratio << std::endl; + + std::cout << "o_batch_stride: " << params.o_batch_stride << std::endl; + std::cout << "o_row_stride: " << params.o_row_stride << std::endl; + std::cout << "o_head_stride: " << params.o_head_stride << std::endl; + std::cout << "b: " << params.b << std::endl; + std::cout << "seqlen_q: " << params.seqlen_q << std::endl; + std::cout << "seqlen_k: " << params.seqlen_k << std::endl; + std::cout << "d: " << params.d << std::endl; + std::cout << "seqlen_q_rounded: " << params.seqlen_q_rounded << std::endl; + std::cout << "seqlen_k_rounded: " << params.seqlen_k_rounded << std::endl; + std::cout << "d_rounded: " << params.d_rounded << std::endl; + std::cout << "scale_softmax: " << params.scale_softmax << std::endl; + std::cout << "scale_softmax_log2: " << params.scale_softmax_log2 << std::endl; + std::cout << "p_dropout: " << params.p_dropout << std::endl; + std::cout << "p_dropout_in_uint8_t: " << params.p_dropout_in_uint8_t + << std::endl; + std::cout << "rp_dropout: " << params.rp_dropout << std::endl; + std::cout << "scale_softmax_rp_dropout: " << params.scale_softmax_rp_dropout + << std::endl; + std::cout << "is_bf16: " << params.is_bf16 << std::endl; + std::cout << "is_causal: " << params.is_causal << std::endl; + + std::cout << "do_batch_stride: " << params.do_batch_stride << std::endl; + std::cout << "do_row_stride: " << params.do_row_stride << std::endl; + std::cout << "do_head_stride: " << params.do_head_stride << std::endl; + std::cout << "dq_batch_stride: " << params.dq_batch_stride << std::endl; + std::cout << "dk_batch_stride: " << params.dk_batch_stride << std::endl; + std::cout << "dv_batch_stride: " << params.dv_batch_stride << std::endl; + std::cout << "dq_row_stride: " << params.dq_row_stride << std::endl; + std::cout << "dk_row_stride: " << params.dk_row_stride << std::endl; + std::cout << "dv_row_stride: " << params.dv_row_stride << std::endl; + std::cout << "dq_head_stride: " << params.dq_head_stride << std::endl; + std::cout << "dk_head_stride: " << params.dk_head_stride << std::endl; + std::cout << "dv_head_stride: " << params.dv_head_stride << std::endl; +} + +// Find the number of splits that maximizes the occupancy. For example, if we +// have batch * n_heads = 48 and we have 108 SMs, having 2 splits (efficiency = +// 0.89) is better than having 3 splits (efficiency = 0.67). However, we also +// don't want too many splits as that would incur more HBM reads/writes. So we +// find the best efficiency, then find the smallest number of splits that gets +// 85% of the best efficiency. +inline int num_splits_heuristic(int batch_nheads_mblocks, int num_SMs, + int num_n_blocks, int max_splits) { + // If we have enough to almost fill the SMs, then just use 1 split + if (batch_nheads_mblocks >= 0.8f * num_SMs) { + return 1; + } + max_splits = std::min({max_splits, num_SMs, num_n_blocks}); + float max_efficiency = 0.f; + std::vector efficiency; + efficiency.reserve(max_splits); + auto ceildiv = [](int a, int b) { return (a + b - 1) / b; }; + // Some splits are not eligible. For example, if we have 64 blocks and choose + // 11 splits, we'll have 6 * 10 + 4 blocks. If we choose 12 splits, we'll have + // 6 * 11 + (-2) blocks (i.e. it's 11 splits anyway). So we check if the + // number of blocks per split is the same as the previous num_splits. + auto is_split_eligible = [&ceildiv, &num_n_blocks](int num_splits) { + return num_splits == 1 || ceildiv(num_n_blocks, num_splits) != + ceildiv(num_n_blocks, num_splits - 1); + }; + for (int num_splits = 1; num_splits <= max_splits; num_splits++) { + if (!is_split_eligible(num_splits)) { + efficiency.push_back(0.f); + } else { + float n_waves = float(batch_nheads_mblocks * num_splits) / num_SMs; + float eff = n_waves / ceil(n_waves); + // printf("num_splits = %d, eff = %f\n", num_splits, eff); + if (eff > max_efficiency) { + max_efficiency = eff; + } + efficiency.push_back(eff); + } + } + for (int num_splits = 1; num_splits <= max_splits; num_splits++) { + if (!is_split_eligible(num_splits)) { + continue; + } + if (efficiency[num_splits - 1] >= 0.85 * max_efficiency) { + // printf("num_splits chosen = %d\n", num_splits); + return num_splits; + } + } + return 1; +} + +void run_mha(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, + void *softmax_lse_ptr, void *softmax_ptr, void *rng_state_ptr, + + uint32_t q_batch_stride, uint32_t k_batch_stride, + uint32_t v_batch_stride, uint32_t o_batch_stride, + + uint32_t q_row_stride, uint32_t k_row_stride, + uint32_t v_row_stride, uint32_t o_row_stride, + + uint32_t q_head_stride, uint32_t k_head_stride, + uint32_t v_head_stride, uint32_t o_head_stride, + + uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, + uint32_t d_rounded, float softmax_scale, + + uint32_t seqlen_q, uint32_t seqlen_k, uint32_t seqlen_q_rounded, + uint32_t seqlen_k_rounded, + + float p_dropout, int window_size_left, int window_size_right, + cudaStream_t stream) { + Flash_fwd_params params; + // Reset the parameters + memset(¶ms, 0, sizeof(params)); + + // Set the pointers and strides. + params.q_ptr = q_ptr; + params.k_ptr = k_ptr; + params.v_ptr = v_ptr; + params.o_ptr = o_ptr; + + params.softmax_lse_ptr = softmax_lse_ptr; + + // All stride are in elements, not bytes. + params.q_batch_stride = q_batch_stride; + params.k_batch_stride = k_batch_stride; + params.v_batch_stride = v_batch_stride; + params.o_batch_stride = o_batch_stride; + + params.q_row_stride = q_row_stride; + params.k_row_stride = k_row_stride; + params.v_row_stride = v_row_stride; + params.o_row_stride = o_row_stride; + params.q_head_stride = q_head_stride; + params.k_head_stride = k_head_stride; + params.v_head_stride = v_head_stride; + params.o_head_stride = o_head_stride; + + // Set the dimensions. + params.b = b; + params.h = h; + params.h_k = h_k; + params.h_h_k_ratio = h / h_k; + params.seqlen_q = seqlen_q; + params.seqlen_k = seqlen_k; + params.seqlen_q_rounded = seqlen_q_rounded; + params.seqlen_k_rounded = seqlen_k_rounded; + params.d = d; + params.d_rounded = d_rounded; + + // Set the different scale values. + params.scale_softmax = softmax_scale; + params.scale_softmax_log2 = softmax_scale * M_LOG2E; + + params.p_dropout = 1.f - p_dropout; // probability to keep + params.p_dropout_in_uint8_t = uint8_t(std::floor(params.p_dropout * 255.0)); + params.rp_dropout = 1.f / params.p_dropout; + params.scale_softmax_rp_dropout = params.rp_dropout * params.scale_softmax; + params.is_bf16 = 0; + params.cu_seqlens_q = nullptr; + params.cu_seqlens_k = nullptr; + params.seqused_k = nullptr; + params.p_ptr = softmax_ptr; // used for `return_softmax`. + params.rng_state = static_cast(rng_state_ptr); + params.is_causal = window_size_left < 0 && window_size_right == 0; + + if (window_size_left < 0 && window_size_right >= 0) { + window_size_left = seqlen_k; + } + if (window_size_left >= 0 && window_size_right < 0) { + window_size_right = seqlen_k; + } + params.window_size_left = window_size_left; + params.window_size_right = window_size_right; + params.is_seqlens_k_cumulative = true; + params.alibi_slopes_ptr = nullptr; + // print_Flash_fwd_params(params); + + FP16_SWITCH(!params.is_bf16, [&] { + FWD_HEADDIM_SWITCH( + params.d, [&] { run_mha_fwd_(params, stream); }); + }); +} + +void run_mha_bwd(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, + void *dout_ptr, void *dq_ptr, void *dk_ptr, void *dv_ptr, + void *dq_accum_ptr, + void *softmax_lse_ptr, void *dsoftmax_sum_ptr, + void *rng_state_ptr, + + uint32_t q_batch_stride, uint32_t k_batch_stride, + uint32_t v_batch_stride, uint32_t o_batch_stride, + + uint32_t q_row_stride, uint32_t k_row_stride, + uint32_t v_row_stride, uint32_t o_row_stride, + + uint32_t q_head_stride, uint32_t k_head_stride, + uint32_t v_head_stride, uint32_t o_head_stride, + + uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, + uint32_t d_rounded, float softmax_scale, + + uint32_t seqlen_q, uint32_t seqlen_k, + uint32_t seqlen_q_rounded, uint32_t seqlen_k_rounded, + + float p_dropout, int window_size_left, int window_size_right, + cudaStream_t stream) { + Flash_bwd_params params; + // Reset the parameters + memset(¶ms, 0, sizeof(params)); + + // Set the pointers and strides. + params.q_ptr = q_ptr; + params.k_ptr = k_ptr; + params.v_ptr = v_ptr; + params.o_ptr = o_ptr; + + params.dq_ptr = dq_ptr; + params.dk_ptr = dk_ptr; + params.dv_ptr = dv_ptr; + params.do_ptr = dout_ptr; + + params.dq_accum_ptr = dq_accum_ptr; + params.dk_accum_ptr = nullptr; + params.dv_accum_ptr = nullptr; + + params.softmax_lse_ptr = softmax_lse_ptr; + + // All stride are in elements, not bytes. + params.q_batch_stride = q_batch_stride; + params.k_batch_stride = k_batch_stride; + params.v_batch_stride = v_batch_stride; + params.o_batch_stride = o_batch_stride; + + params.q_row_stride = q_row_stride; + params.k_row_stride = k_row_stride; + params.v_row_stride = v_row_stride; + params.o_row_stride = o_row_stride; + params.q_head_stride = q_head_stride; + params.k_head_stride = k_head_stride; + params.v_head_stride = v_head_stride; + params.o_head_stride = o_head_stride; + + params.dq_batch_stride = q_batch_stride; + params.dk_batch_stride = k_batch_stride; + params.dv_batch_stride = v_batch_stride; + params.do_batch_stride = o_batch_stride; + + params.dq_row_stride = q_row_stride; + params.dk_row_stride = k_row_stride; + params.dv_row_stride = v_row_stride; + params.do_row_stride = o_row_stride; + params.dq_head_stride = q_head_stride; + params.dk_head_stride = k_head_stride; + params.dv_head_stride = v_head_stride; + params.do_head_stride = o_head_stride; + + // Set the dimensions. + params.b = b; + params.h = h; + params.h_k = h_k; + params.h_h_k_ratio = h / h_k; + params.seqlen_q = seqlen_q; + params.seqlen_k = seqlen_k; + params.seqlen_q_rounded = seqlen_q_rounded; + params.seqlen_k_rounded = seqlen_k_rounded; + params.d = d; + params.d_rounded = d_rounded; + params.is_causal = window_size_left < 0 && window_size_right == 0; + if (window_size_left < 0 && window_size_right >= 0) { + window_size_left = seqlen_k; + } + if (window_size_left >= 0 && window_size_right < 0) { + window_size_right = seqlen_k; + } + params.window_size_left = window_size_left; + params.window_size_right = window_size_right; + params.is_seqlens_k_cumulative = true; + + // Set the different scale values. + params.scale_softmax = softmax_scale; + params.scale_softmax_log2 = softmax_scale * M_LOG2E; + + params.p_dropout = 1.f - p_dropout; // probability to keep + params.p_dropout_in_uint8_t = uint8_t(std::floor(params.p_dropout * 255.0)); + params.rp_dropout = 1.f / params.p_dropout; + params.scale_softmax_rp_dropout = params.rp_dropout * params.scale_softmax; + params.is_bf16 = 0; + params.cu_seqlens_q = nullptr; + params.cu_seqlens_k = nullptr; + params.seqused_k = nullptr; + params.p_ptr = nullptr; // used for `return_softmax`, no use in bwd + params.dsoftmax_sum = dsoftmax_sum_ptr; + params.rng_state = static_cast(rng_state_ptr); + params.alibi_slopes_ptr = nullptr; + // print_Flash_bwd_params(params); + + bool configure = false; + FP16_SWITCH(!params.is_bf16, [&] { + if (params.d <= 32) { + run_mha_bwd_(params, stream, configure); + } else if (params.d <= 64) { + run_mha_bwd_(params, stream, configure); + } else if (params.d <= 96) { + run_mha_bwd_(params, stream, configure); + } else if (params.d <= 128) { + run_mha_bwd_(params, stream, configure); + } else if (params.d <= 160) { + run_mha_bwd_(params, stream, configure); + } else if (params.d <= 192) { + run_mha_bwd_(params, stream, configure); + } else if (params.d <= 224) { + run_mha_bwd_(params, stream, configure); + } else if (params.d <= 256) { + run_mha_bwd_(params, stream, configure); + } + }); +} + +void run_mha_fwd_with_kvcache( + void *q_ptr, void *k_ptr, void *v_ptr, void *knew_ptr, void *vnew_ptr, + void *seqlens_k_, void *o_ptr, void *softmax_lse_ptr, + + uint32_t q_batch_stride, uint32_t k_batch_stride, uint32_t v_batch_stride, + uint32_t knew_batch_stride, uint32_t vnew_batch_stride, + uint32_t o_batch_stride, + + uint32_t q_row_stride, uint32_t k_row_stride, uint32_t v_row_stride, + uint32_t knew_row_stride, uint32_t vnew_row_stride, uint32_t o_row_stride, + + uint32_t q_head_stride, uint32_t k_head_stride, uint32_t v_head_stride, + uint32_t knew_head_stride, uint32_t vnew_head_stride, + uint32_t o_head_stride, + + uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, uint32_t d_rounded, + uint32_t seqlen_knew, float softmax_scale, + + uint32_t seqlen_q, uint32_t seqlen_k, uint32_t seqlen_q_rounded, + uint32_t seqlen_k_rounded, + + int window_size_left, int window_size_right, cudaStream_t stream) { + Flash_fwd_params params; + // Reset the parameters + memset(¶ms, 0, sizeof(params)); + + // Set the pointers and strides. + params.q_ptr = q_ptr; + params.k_ptr = k_ptr; + params.v_ptr = v_ptr; + params.o_ptr = o_ptr; + + params.softmax_lse_ptr = softmax_lse_ptr; + + // All stride are in elements, not bytes. + params.q_batch_stride = q_batch_stride; + params.k_batch_stride = k_batch_stride; + params.v_batch_stride = v_batch_stride; + params.o_batch_stride = o_batch_stride; + + params.q_row_stride = q_row_stride; + params.k_row_stride = k_row_stride; + params.v_row_stride = v_row_stride; + params.o_row_stride = o_row_stride; + params.q_head_stride = q_head_stride; + params.k_head_stride = k_head_stride; + params.v_head_stride = v_head_stride; + params.o_head_stride = o_head_stride; + + // Set the dimensions. + params.b = b; + params.h = h; + params.h_k = h_k; + params.h_h_k_ratio = h / h_k; + params.seqlen_q = seqlen_q; + params.seqlen_k = seqlen_k; + params.seqlen_q_rounded = seqlen_q_rounded; + params.seqlen_k_rounded = seqlen_k_rounded; + params.d = d; + params.d_rounded = d_rounded; + + // Set the different scale values. + params.scale_softmax = softmax_scale; + params.scale_softmax_log2 = softmax_scale * M_LOG2E; + + params.p_dropout = 1.f; // probability to keep + params.p_dropout_in_uint8_t = uint8_t(std::floor(params.p_dropout * 255.0)); + params.rp_dropout = 1.f / params.p_dropout; + params.scale_softmax_rp_dropout = params.rp_dropout * params.scale_softmax; + + params.is_bf16 = 0; + params.cu_seqlens_q = nullptr; + params.cu_seqlens_k = static_cast(seqlens_k_); + params.seqused_k = nullptr; + params.p_ptr = nullptr; // used for `return_softmax`. + params.rng_state = nullptr; + params.alibi_slopes_ptr = nullptr; + params.is_causal = window_size_left < 0 && window_size_right == 0; + + if (window_size_left < 0 && window_size_right >= 0) { + window_size_left = seqlen_k; + } + if (window_size_left >= 0 && window_size_right < 0) { + window_size_right = seqlen_k; + } + params.window_size_left = window_size_left; + params.window_size_right = window_size_right; + params.is_seqlens_k_cumulative = false; + + params.seqlen_knew = seqlen_knew; + params.knew_ptr = knew_ptr; + params.vnew_ptr = vnew_ptr; + // All stride are in elements, not bytes. + params.knew_batch_stride = knew_batch_stride; + params.vnew_batch_stride = vnew_batch_stride; + params.knew_row_stride = knew_row_stride; + params.vnew_row_stride = vnew_row_stride; + params.knew_head_stride = knew_head_stride; + params.vnew_head_stride = vnew_head_stride; + + // TODO: ROPE support TBD + params.rotary_dim = 0; + + // This needs to match with run_mha_fwd_splitkv_dispatch + // const int head_size = round_multiple(head_size_og, 8); + const int block_n = h <= 64 ? 256 : (h <= 128 ? 128 : 64); + const int num_n_blocks = (seqlen_k + block_n - 1) / block_n; + // Technically kBlockM = 64 only for the splitKV kernels, not the standard + // kernel. In any case we don't expect seqlen_q to be larger than 64 for + // inference. + const int num_m_blocks = (seqlen_q + 64 - 1) / 64; + // cudaDeviceProp dprops; + // cudaGetDeviceProperties(&dprops, 0); + // params.num_splits = num_splits_heuristic( + // b * h_k * num_m_blocks, dprops->multiProcessorCount, num_n_blocks, + // 128); + // static_assert(params.num_splits <= 128 && "num_splits > 128 not + // supported"); + params.num_splits = 1; + // TODO: support > 1 split + // if (params.num_splits > 1) { + // at::Tensor softmax_lse_accum = + // torch::empty({params.num_splits, batch_size, num_heads, seqlen_q}, + // opts.dtype(at::kFloat)); + // at::Tensor out_accum = torch::empty( + // {params.num_splits, batch_size, num_heads, seqlen_q, + // head_size_rounded}, opts.dtype(at::kFloat)); + // params.softmax_lseaccum_ptr = softmax_lse_accum.data_ptr(); + // params.oaccum_ptr = out_accum.data_ptr(); + // } + // print_Flash_fwd_params(params); + + FP16_SWITCH(!params.is_bf16, [&] { + FWD_HEADDIM_SWITCH(params.d, [&] { + run_mha_fwd_splitkv_dispatch(params, stream); + }); + }); +} + +int64_t getIntFromVoidPtr(void *data, size_t &pos) { + int64_t *intPtr = + reinterpret_cast(static_cast(data) + pos); + pos += sizeof(int64_t); + return *intPtr; +} + +float getFloatFromVoidPtr(void *data, size_t &pos) { + float *floatPtr = reinterpret_cast(static_cast(data) + pos); + pos += sizeof(float); + return *floatPtr; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void run_flash_attn_fwd(void **tensors, void *extra_args, cudaStream_t stream) { + size_t pos = 0; + auto q_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto q_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto q_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto b = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto h = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto h_k = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto d = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto d_rounded = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto softmax_scale = static_cast(getFloatFromVoidPtr(extra_args, pos)); + auto seqlen_q = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_k = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_q_rounded = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_k_rounded = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto p_dropout = static_cast(getFloatFromVoidPtr(extra_args, pos)); + auto window_size_left = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto window_size_right = + static_cast(getIntFromVoidPtr(extra_args, pos)); + + run_mha(tensors[0], tensors[1], tensors[2], tensors[3], tensors[4], + tensors[5], tensors[6], + /*q_batch_stride*/ q_batch_stride, + /*k_batch_stride*/ k_batch_stride, + /*v_batch_stride*/ v_batch_stride, + /*o_batch_stride*/ o_batch_stride, + /*q_row_stride*/ q_row_stride, + /*k_row_stride*/ k_row_stride, + /*v_row_stride*/ v_row_stride, + /*o_row_stride*/ o_row_stride, + /*q_head_stride*/ q_head_stride, + /*k_head_stride*/ k_head_stride, + /*v_head_stride*/ v_head_stride, + /*o_head_stride*/ o_head_stride, + /*b*/ b, + /*h*/ h, + /*h_k*/ h_k, + /*d*/ d, + /*d_rounded*/ d_rounded, + /*softmax_scale*/ softmax_scale, + /*seqlen_q*/ seqlen_q, + /*seqlen_k*/ seqlen_k, + /*seqlen_q_rounded*/ seqlen_q_rounded, + /*seqlen_k_rounded*/ seqlen_k_rounded, + /*p_dropout*/ p_dropout, + /*window_size_left*/ window_size_left, + /*window_size_right*/ window_size_right, stream); +} + +void run_flash_attn_bwd(void **tensors, void *extra_args, cudaStream_t stream) { + size_t pos = 0; + auto q_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto q_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto q_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto b = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto h = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto h_k = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto d = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto d_rounded = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto softmax_scale = static_cast(getFloatFromVoidPtr(extra_args, pos)); + auto seqlen_q = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_k = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_q_rounded = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_k_rounded = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto p_dropout = static_cast(getFloatFromVoidPtr(extra_args, pos)); + auto window_size_left = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto window_size_right = + static_cast(getIntFromVoidPtr(extra_args, pos)); + + //tensors: dout, q, k, v, out, softmax_lse, rng_state, dq, dk, dv, d_softmax, dq_accum + run_mha_bwd(tensors[1], tensors[2], tensors[3], tensors[4], tensors[0], + tensors[7], tensors[8], tensors[9], tensors[11], tensors[5], + tensors[10], tensors[6], + /*q_batch_stride*/ q_batch_stride, + /*k_batch_stride*/ k_batch_stride, + /*v_batch_stride*/ v_batch_stride, + /*o_batch_stride*/ o_batch_stride, + /*q_row_stride*/ q_row_stride, + /*k_row_stride*/ k_row_stride, + /*v_row_stride*/ v_row_stride, + /*o_row_stride*/ o_row_stride, + /*q_head_stride*/ q_head_stride, + /*k_head_stride*/ k_head_stride, + /*v_head_stride*/ v_head_stride, + /*o_head_stride*/ o_head_stride, + /*b*/ b, + /*h*/ h, + /*h_k*/ h_k, + /*d*/ d, + /*d_rounded*/ d_rounded, + /*softmax_scale*/ softmax_scale, + /*seqlen_q*/ seqlen_q, + /*seqlen_k*/ seqlen_k, + /*seqlen_q_rounded*/ seqlen_q_rounded, + /*seqlen_k_rounded*/ seqlen_k_rounded, + /*p_dropout*/ p_dropout, + /*window_size_left*/ window_size_left, + /*window_size_right*/ window_size_right, stream); +} + +void run_flash_attn_kvcache(void **tensors, void *extra_args, + cudaStream_t stream) { + size_t pos = 0; + auto q_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto knew_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto vnew_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_batch_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto q_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto knew_row_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto vnew_row_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_row_stride = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto q_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto k_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto v_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto knew_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto vnew_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto o_head_stride = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto b = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto h = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto h_k = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto d = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto d_rounded = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_knew = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto softmax_scale = static_cast(getFloatFromVoidPtr(extra_args, pos)); + auto seqlen_q = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_k = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_q_rounded = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto seqlen_k_rounded = + static_cast(getIntFromVoidPtr(extra_args, pos)); + auto window_size_left = static_cast(getIntFromVoidPtr(extra_args, pos)); + auto window_size_right = static_cast(getIntFromVoidPtr(extra_args, pos)); + + run_mha_fwd_with_kvcache(tensors[0], tensors[1], tensors[2], tensors[3], + tensors[4], tensors[5], tensors[6], tensors[7], + /*q_batch_stride*/ q_batch_stride, + /*k_batch_stride*/ k_batch_stride, + /*v_batch_stride*/ v_batch_stride, + /*knew_batch_stride*/ knew_batch_stride, + /*vnew_batch_stride*/ vnew_batch_stride, + /*o_batch_stride*/ o_batch_stride, + /*q_row_stride*/ q_row_stride, + /*k_row_stride*/ k_row_stride, + /*v_row_stride*/ v_row_stride, + /*knew_row_stride*/ knew_row_stride, + /*vnew_row_stride*/ vnew_row_stride, + /*o_row_stride*/ o_row_stride, + /*q_head_stride*/ q_head_stride, + /*k_head_stride*/ k_head_stride, + /*v_head_stride*/ v_head_stride, + /*knew_head_stride*/ knew_head_stride, + /*vnew_head_stride*/ vnew_head_stride, + /*o_head_stride*/ o_head_stride, + /*b*/ b, + /*h*/ h, + /*h_k*/ h_k, + /*d*/ d, + /*d_rounded*/ d_rounded, + /*seqlen_knew*/ seqlen_knew, + /*softmax_scale*/ softmax_scale, + /*seqlen_q*/ seqlen_q, + /*seqlen_k*/ seqlen_k, + /*seqlen_q_rounded*/ seqlen_q_rounded, + /*seqlen_k_rounded*/ seqlen_k_rounded, + /*window_size_left*/ window_size_left, + /*window_size_right*/ window_size_right, stream); +} +#ifdef __cplusplus +} +#endif diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_bf16_sm80.cu new file mode 100644 index 000000000..78f4793b9 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim128(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_fp16_sm80.cu new file mode 100644 index 000000000..641cac0e3 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim128_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim128(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_bf16_sm80.cu new file mode 100644 index 000000000..ad763a6a8 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim160(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_fp16_sm80.cu new file mode 100644 index 000000000..23d814595 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim160_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim160(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_bf16_sm80.cu new file mode 100644 index 000000000..82dafe7c3 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim192(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_fp16_sm80.cu new file mode 100644 index 000000000..55dcab4d8 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim192_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim192(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_bf16_sm80.cu new file mode 100644 index 000000000..e987c0039 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim224(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_fp16_sm80.cu new file mode 100644 index 000000000..37430ba5e --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim224_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim224(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_bf16_sm80.cu new file mode 100644 index 000000000..6d4b10ee3 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim256(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_fp16_sm80.cu new file mode 100644 index 000000000..0a214429a --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim256_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim256(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_bf16_sm80.cu new file mode 100644 index 000000000..a7a1506c4 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim32(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_fp16_sm80.cu new file mode 100644 index 000000000..b2281ee1d --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim32_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim32(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_bf16_sm80.cu new file mode 100644 index 000000000..464bf9b29 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim64(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_fp16_sm80.cu new file mode 100644 index 000000000..f2439a29b --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim64_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim64(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_bf16_sm80.cu new file mode 100644 index 000000000..1234ff41a --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim96(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_fp16_sm80.cu new file mode 100644 index 000000000..c21f90671 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_hdim96_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_bwd_launch_template.h" + +template<> +void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + run_mha_bwd_hdim96(params, stream, configure); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h b/external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h new file mode 100644 index 000000000..21212410a --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h @@ -0,0 +1,1639 @@ +/*************************************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +#include + +#include +#include +#include + +#include "block_info.h" +#include "kernel_traits.h" +#include "utils.h" +#include "softmax.h" + +#include "alibi.h" + +namespace flash { + +using namespace cute; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTE_HOST_DEVICE +auto +make_tiled_copy_B_warpcontiguousN(Copy_Atom const& copy_atom, + TiledMMA const& tiled_mma) { + using TileShape_MNK = typename TiledMMA::TiledShape_MNK; + using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; + constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; + // Divide by 2 because right now we always use 2 for the ValLayout + constexpr int kNWarpsN = decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; + constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; + // This gives the correct layout, idk why. + // auto t = make_tile(Layout, _2>, + // Stride, _8> >{}, + // auto t = make_tile(Layout, + // Stride<_1, _64, _8> >{}, + auto t = make_tile(Layout, Int, _2>, // (8, 2, 2) or (8, 4, 2) + Stride<_1, Int, _8> >{}, // (1, 64, 8) or (1, 32, 8) + make_layout(size<2>(TileShape_MNK{}))); + // if (cute::thread0()) {printf("make_tiled_copy_B_warpcontiguousN "); print(t); printf("\n"); } + return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutB_TV(), t); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTE_HOST_DEVICE +auto +make_tiled_copy_C_warpcontiguousN(Copy_Atom const& copy_atom, + TiledMMA const& tiled_mma) { + using TileShape_MNK = typename TiledMMA::TiledShape_MNK; + using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; + constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; + // Divide by 2 because right now we always use 2 for the ValLayout + constexpr int kNWarpsN = decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; + constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; + auto t = make_tile(make_layout(size<0>(TileShape_MNK{})), + Layout, Int, _2>, // (8, 2, 2) or (8, 4, 2) + Stride<_1, Int, _8> >{}); // (1, 64, 8) or (1, 32, 8) + // if (cute::thread0()) {printf("make_tiled_copy_C_warpcontiguousN "); print(t); printf("\n"); } + return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutC_TV(), t); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void dot_do_o(Tensor const &do_, Tensor const &o, + Tensor &dP_sum, const int gdP_col_stride, const float scale) { + static_assert(Layout0::rank == 3, "Only support 3D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(do_.layout() == o.layout()); + // Reshape do_ and o from (8, kBlockM / 32, kHeadDim / 64) to (kBlockM / 32, 8 * kHeadDim / 64) + // The last coordinate is the "page". + Tensor do_reshaped = make_tensor(do_.data(), make_layout(get<1>(do_.layout()), + make_layout(get<0>(do_.layout()), + get<2>(do_.layout())))); + Tensor o_reshaped = make_tensor(o.data(), do_reshaped.layout()); + Tensor do_fp32 = flash::convert_type(do_reshaped); + Tensor o_fp32 = flash::convert_type(o_reshaped); + #pragma unroll + for (int mi = 0; mi < size<0>(do_reshaped); ++mi) { + float dP_sum_cur = do_fp32(mi, 0) * o_fp32(mi, 0); + #pragma unroll + for (int ni = 1; ni < size<1>(do_reshaped); ni++) { + dP_sum_cur += do_fp32(mi, ni) * o_fp32(mi, ni); + } + flash::SumOp sum_op; + dP_sum_cur = flash::Allreduce::run(dP_sum_cur, sum_op) * scale; + if (threadIdx.x % THREADS_PER_ROW == 0) { + dP_sum(mi * gdP_col_stride + threadIdx.x / THREADS_PER_ROW) = dP_sum_cur; + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Just compute dot(do, o) and write the result (softmax_d) to global memory as a separate kernel. +// This is used in the case where we want to parallelize the backward across seqlen_k. +template +inline __device__ void compute_dot_do_o(const Params ¶ms) { + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (m_block * kBlockM >= binfo.actual_seqlen_q) return; + + const index_t row_offset_do = binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + + m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; + const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + const index_t row_offset_dq_accum = binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, params.h * params.d_rounded, bidb) + + (m_block * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * params.h * params.d_rounded + bidh * params.d_rounded; + const index_t row_offset_dpsum = (bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM; + + Tensor gdO = make_tensor(make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), + Shape, Int>{}, + make_stride(params.do_row_stride, _1{})); + Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gdQaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + row_offset_dq_accum), + Shape, Int>{}, + make_stride(params.h * params.d_rounded, _1{})); + Tensor dP_sum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + row_offset_dpsum), + Shape>{}, Stride<_1>{}); + + typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; + auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); + // TODO: careful, we're zeroing out dQaccum with type float4, but when + // we do atomicAdds, we use type float. The layouts are different. Check this. + typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dQaccum; + auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); + + Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); + Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); + Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); + + Tensor cdO = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdOcdO = gmem_thr_copy_dO.partition_S(cdO); + + // Allocate predicate tensors for k + Tensor tdOpdO = make_tensor(make_shape(size<2>(tdOgdO))); + // Set predicates for k bounds + #pragma unroll + for (int k = 0; k < size(tdOpdO); ++k) {tdOpdO(k) = get<1>(tdOcdO(0, 0, k)) < params.d;} + + Tensor tdOrdO = make_fragment_like(tdOgdO); + Tensor tdOrO = make_fragment_like(tdOgO); + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOrdO, tdOcdO, tdOpdO, binfo.actual_seqlen_q - m_block * kBlockM + ); + flash::copy( + gmem_tiled_copy_dO, tdOgO, tdOrO, tdOcdO, tdOpdO, binfo.actual_seqlen_q - m_block * kBlockM + ); + // By right we need to scale dP up by 1/p_dropout, but instead we don't and only scale the final + // results (dQ and dK) by 1/p_dropout. So we need to keep dP_sum scaled down by p_dropout here, + // so that (dP - dP_sum) is on the same scale. + dot_do_o(tdOrdO, tdOrO, dP_sum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout); + if (Clear_dQaccum) { + // We're actually not zero'ing out all of dQaccum, but only the part that we're going to + // do atomicAdds on. + Tensor zero = make_fragment_like(tdQgdQaccum); + clear(zero); + cute::copy(gmem_tiled_copy_dQaccum, zero, tdQgdQaccum); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void clear_dKVaccum(const Params ¶ms) { + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + const int n_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (n_block * kBlockN >= binfo.actual_seqlen_k) return; + + const index_t row_offset_dkv_accum = ((bidb * params.h_k + bidh) * params.seqlen_k_rounded + n_block * kBlockN) * params.d_rounded; + + Tensor gdKaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + Tensor gdVaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + + typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dKVaccum; + auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); + Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); + Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); + Tensor zero = make_fragment_like(tdKgdKaccum); + clear(zero); + cute::copy(gmem_tiled_copy_dKVaccum, zero, tdKgdKaccum); + cute::copy(gmem_tiled_copy_dKVaccum, zero, tdVgdVaccum); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Convert dQ from dQaccum (in float) to fp16/bf16. +// This is used in the case where we want to parallelize the backward across seqlen_k. +template +inline __device__ void convert_dQ(const Params ¶ms, const int nsplits) { + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (m_block * kBlockM >= binfo.actual_seqlen_q) return; + + const index_t row_offset_dq = binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + + m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; + const index_t row_offset_dq_accum = binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, params.h * params.d_rounded, bidb) + + (m_block * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * params.h * params.d_rounded + bidh * params.d_rounded; + + Tensor gdQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), + Shape, Int>{}, + make_stride(params.dq_row_stride, _1{})); + Tensor gdQaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + row_offset_dq_accum), + Shape, Int>{}, + make_stride(params.h * params.d_rounded, _1{})); + + Tensor sdQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutdQ{}); + + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; + auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dQaccum; + auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); + + typename Kernel_traits::TiledMmadQ tiled_mma_dq; + auto smem_tiled_copy_dQ = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); + auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor taccdQsdQ = smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + Tensor tdQsdQ = gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); + Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_S(gdQaccum); + + Tensor acc_dq = partition_fragment_C(tiled_mma_dq, Shape, Int>{}); // MMA, MMA_N, MMA_K + CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); + + Tensor tdQrdQaccum = make_fragment_like(tdQgdQaccum); + clear(acc_dq); + for (int s = 0; s < nsplits; ++s) { + cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, tdQrdQaccum); + #pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) += tdQrdQaccum(i); } + tdQgdQaccum.data() = tdQgdQaccum.data() + params.dq_accum_split_stride; + } + #pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) *= params.scale_softmax_rp_dropout; } + // Convert acc_dq from fp32 to fp16 + Tensor rdQ = flash::convert_type(acc_dq); + Tensor taccdQrdQ = smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); + __syncthreads(); + Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); + cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); + + Tensor cdQ = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); + Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); + #pragma unroll + for (int k = 0; k < size(tdQpdQ); ++k) { tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy( + gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, tdQpdQ, binfo.actual_seqlen_q - m_block * kBlockM + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Convert dK and dV from dKaccum and dVaccum (in float) to fp16/bf16. +// This is used in the case where we want to parallelize the backward across seqlen_q. +template +inline __device__ void convert_dKV(const Params ¶ms) { + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + const int n_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (n_block * kBlockN >= binfo.actual_seqlen_k) return; + + const index_t row_offset_dk = binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; + const index_t row_offset_dv = binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; + const index_t row_offset_dkv_accum = ((bidb * params.h_k + bidh) * params.seqlen_k_rounded + + n_block * kBlockN) * params.d_rounded; + + Tensor gdK = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), + Shape, Int>{}, + make_stride(params.dk_row_stride, _1{})); + Tensor gdV = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), + Shape, Int>{}, + make_stride(params.dv_row_stride, _1{})); + Tensor gdKaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + row_offset_dkv_accum), + Shape, Int>{}, + Stride, _1>{}); + Tensor gdVaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + row_offset_dkv_accum), + Shape, Int>{}, + Stride, _1>{}); + + Tensor sdK = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutdKV{}); + Tensor sdV = make_tensor(sdK.data() + size(sdK), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) + + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dKV; + auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dKVaccum; + auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); + + typename Kernel_traits::TiledMmadKV tiled_mma_dkv; + auto smem_tiled_copy_dKV = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); + auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor taccdKsdK = smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) + Tensor taccdVsdV = smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + Tensor tdKsdK = gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); + Tensor tdVsdV = gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); + Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_S(gdKaccum); + Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_S(gdVaccum); + + Tensor acc_dk = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + Tensor acc_dv = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + CUTE_STATIC_ASSERT_V(size(acc_dk) == size(tdKgdKaccum)); + CUTE_STATIC_ASSERT_V(size(acc_dv) == size(tdVgdVaccum)); + + Tensor tdKrdKaccum = make_fragment_like(tdKgdKaccum); + Tensor tdVrdVaccum = make_fragment_like(tdVgdVaccum); + cute::copy(gmem_tiled_copy_dKVaccum, tdKgdKaccum, tdKrdKaccum); + cute::copy(gmem_tiled_copy_dKVaccum, tdVgdVaccum, tdVrdVaccum); + #pragma unroll + for (int i = 0; i < size(acc_dk); ++i) { + acc_dk(i) = tdKrdKaccum(i) * params.scale_softmax_rp_dropout; + } + #pragma unroll + for (int i = 0; i < size(acc_dv); ++i) { + acc_dv(i) = tdVrdVaccum(i) * params.rp_dropout; + } + // Convert acc_dk from fp32 to fp16 + Tensor rdK = flash::convert_type(acc_dk); + Tensor rdV = flash::convert_type(acc_dv); + Tensor taccdKrdK = smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdVrdV = smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); + cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); + __syncthreads(); + Tensor tdKrdK = make_tensor(shape(tdKgdK)); + Tensor tdVrdV = make_tensor(shape(tdVgdV)); + cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); + cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); + + Tensor cdKV = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); + Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); + #pragma unroll + for (int k = 0; k < size(tdKVpdKV); ++k) { tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy( + gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + flash::copy( + gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_dq_dk_dv_1colblock(const Params ¶ms, const int bidb, const int bidh, const int n_block) { + + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + // constexpr int kNWarps = Kernel_traits::kNWarps; + constexpr int MMA_N_SdP = kBlockN / decltype(size<1>(typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; + constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; + constexpr bool Double_buffer = !Kernel_traits::No_double_buffer; + + const BlockInfo binfo(params, bidb); + if (n_block * kBlockN >= binfo.actual_seqlen_k) return; + + int m_block_max = cute::ceil_div(binfo.actual_seqlen_q, kBlockM); + if (Is_local) { + m_block_max = std::min(m_block_max, cute::ceil_div((n_block + 1) * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k + params.window_size_left, kBlockM)); + } + + const index_t row_offset_q = binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.q_row_stride + bidh * params.q_head_stride; + const index_t row_offset_k = binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + + n_block * kBlockN * params.k_row_stride + (bidh / params.h_h_k_ratio) * params.k_head_stride; + const index_t row_offset_v = binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + + n_block * kBlockN * params.v_row_stride + (bidh / params.h_h_k_ratio) * params.v_head_stride; + const index_t row_offset_do = binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.do_row_stride + bidh * params.do_head_stride; + const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + const index_t row_offset_dq = binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; + const index_t row_offset_dq_accum = binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, params.h * params.d_rounded, bidb) + + ((m_block_max - 1) * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * params.h * params.d_rounded + bidh * params.d_rounded + // If deterministic, each thread block will do atomicAdd to a different dQ_accum buffer. + + (!params.deterministic ? 0 : blockIdx.x * params.dq_accum_split_stride); + const index_t row_offset_lse = (bidb * params.h + bidh) * params.seqlen_q + + (m_block_max - 1) * kBlockM; + const index_t row_offset_dpsum = (bidb * params.h + bidh) * params.seqlen_q_rounded + + (m_block_max - 1) * kBlockM; + + Tensor gQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), + Shape, Int>{}, + make_stride(params.q_row_stride, _1{})); + Tensor gK = make_tensor(make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), + Shape, Int>{}, + make_stride(params.k_row_stride, _1{})); + Tensor gV = make_tensor(make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), + Shape, Int>{}, + make_stride(params.v_row_stride, _1{})); + Tensor gdO = make_tensor(make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), + Shape, Int>{}, + make_stride(params.do_row_stride, _1{})); + Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gdQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), + Shape, Int>{}, + make_stride(params.dq_row_stride, _1{})); + Tensor gdQaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + row_offset_dq_accum), + Shape, Int>{}, + make_stride(params.h * params.d_rounded, _1{})); + Tensor gLSE = make_tensor(make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + row_offset_lse), + Shape>{}, Stride<_1>{}); + Tensor gdPsum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + row_offset_dpsum), + Shape>{}, Stride<_1>{}); + + Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutQdO{}); + Tensor sQt = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sQtNoSwizzle = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + // Double buffer for sQ + Tensor sdO = make_tensor(sQ.data() + (Double_buffer ? 2 : 1) * size(sQ), typename Kernel_traits::SmemLayoutQdO{}); + Tensor sdOt = make_tensor(sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sdOtransposedNoSwizzle = make_tensor(sdO.data(), + typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + Tensor sK = make_tensor(sdO.data() + size(sdO), typename Kernel_traits::SmemLayoutKV{}); + Tensor sV = make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{}); + Tensor sKt = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); + Tensor sKtNoSwizzle = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); + Tensor sdS = make_tensor(!Kernel_traits::Is_V_in_regs ? sV.data() + size(sV) : sK.data() + size(sK), + typename Kernel_traits::SmemLayoutPdS{}); + Tensor sdSt = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sdStNoSwizzle = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + Tensor sP = make_tensor(sdS.data() + size(sdS), typename Kernel_traits::SmemLayoutPdS{}); + Tensor sPt = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sPtNoSwizzle = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + // sP and sdQ share the same memory so be careful + Tensor sdQ = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutdQ{}); + + typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; + auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); + using GmemTiledCopydO = std::conditional_t< + Is_first, + typename Kernel_traits::GmemTiledCopydO, + typename Kernel_traits::GmemTiledCopyQKV + >; + GmemTiledCopydO gmem_tiled_copy_dO; + auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; + auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); + using GmemLayoutAtomdQaccum = std::conditional_t< + !Seq_parallel, + typename Kernel_traits::GmemTiledCopydQaccum, + typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd + >; + GmemLayoutAtomdQaccum gmem_tiled_copy_dQaccum; + auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); + + Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); + Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); + Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); + Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); + Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); + Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) + Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); + Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) + Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); + Tensor tdQsdQ = gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); + Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); + // if (cute::thread0()) { print(tdQgdQaccum.layout()); printf("\n"); } + // __syncthreads(); + // if (blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0 && tidx < 64) { + // printf("tidx = %d, tdQgdQaccum = 0x%p\n", tidx, tdQgdQaccum.data()); + // } + + typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; + auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); + Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) + Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) + Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) + Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) + + typename Kernel_traits::TiledMmadKV tiled_mma_dkv; + auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); + Tensor tdKrdSt = thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdKrQt = thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) + Tensor tdVrPt = thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdVrdO = thr_mma_dkv.partition_fragment_B(sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) + + typename Kernel_traits::TiledMmadQ tiled_mma_dq; + auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); + Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) + Tensor tdQrKt = thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) + + Tensor acc_dk = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + Tensor acc_dv = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + + // + // Copy Atom retiling + // + + auto smem_tiled_copy_QdO = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); + Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); + Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); + + // auto smem_thr_copy_KV = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp).get_thread_slice(tidx); + auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); + Tensor tSsK = smem_thr_copy_KV.partition_S(sK); + // if (cute::thread(0, 0) && n_block == 0) { printf("sK layout: "); print(sK.layout()); printf("\n"); } + // if (cute::thread(0, 0) && n_block == 0) { print(tSsK.layout()); printf("\n"); } + Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); + + // Partition sP and sdS to match the accumulator partitioning + // This has to be tiled_mma_sdp, not tiled_mma_dkv + // auto smem_thr_copy_PdS = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp).get_thread_slice(tidx); + auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN(typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); + auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); + Tensor tPsP = smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) + // if (cute::thread(0, 0) && n_block == 0) { printf("sP layout: "); print(sP.layout()); printf("\n"); } + // if (cute::thread(0, 0) && n_block == 0) { print(tPsP.layout()); printf("\n"); } + // if (n_block == 0 && blockIdx.x == 0 && blockIdx.y == 0 && tidx < 64) { + // printf("tidx=%d, tPsP = 0x%p\n", tidx, tPsP.data()); + // } + Tensor tdSsdS = smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + auto smem_tiled_copy_PdSt = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); + Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); + Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); + + auto smem_tiled_copy_QdOt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); + Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); + Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); + + auto smem_tiled_copy_dS = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); + auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); + Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); + + auto smem_tiled_copy_Kt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); + auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); + Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); + + auto smem_tiled_copy_dQ = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); + auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor taccdQsdQ = smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + // + // PREDICATES + // + + Tensor cQ = make_identity_tensor(make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor cKV = make_identity_tensor(make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + Tensor tQcQ = gmem_thr_copy_QKV.partition_D(cQ); + Tensor tKVcKV = gmem_thr_copy_QKV.partition_D(cKV); + + // Allocate predicate tensors for k + Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); + Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); + + // Set predicates for k bounds + if (!Is_even_K) { + #pragma unroll + for (int k = 0; k < size(tQpQ); ++k) { tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; } + #pragma unroll + for (int k = 0; k < size(tKVpKV); ++k) { tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; } + } + + // Prologue + + // We'll advance gdQ and gdQaccum before the 1st read/write. + tdQgdQ.data() = tdQgdQ.data() + kBlockM * params.dq_row_stride; + tdQgdQaccum.data() = tdQgdQaccum.data() + kBlockM * params.h * params.d_rounded; + + int m_block = m_block_max - 1; + int m_block_min = (!Is_causal && !Is_local) + ? 0 + : std::max(0, (n_block * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k - params.window_size_right) / kBlockM); + // If not local, we're guaranteed that m_block_min <= m_block: + // We checked earlier that n_block * kBlockN < actual_seqlen_k, so in the causal case, + // n_block * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k < actual_seqlen_q. + // So m_block_min <= (actual_seqlen_q - 1) / kBlockM. + // Recall that m_block_max = cute::ceil_div(binfo.actual_seqlen_q, kBlockM) = (actual_seqlen_q + kBlockM - 1) / kBlockM. + // So m_block_m - 1 = (actual_seqlen_q - 1) / kBlockM. + // We conclude that m_block_min <= m_block, so we will always have at least 1 iteration of the for loop. + // However, if local, then this possible to have some blocks of K & V not attending to any query. + // We might need to exit early and write 0 to dK and dV for those blocks. + // Otherwise we get wrong result for the case where we don't enter the for loop. + // And we might read OOB elements from gQ and gdO. + // This also covers the case where actual_seqlen_q == 0 + if ((Is_local || !Is_even_MN) && m_block < m_block_min) { + const index_t row_offset_dk = binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; + const index_t row_offset_dv = binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; + Tensor gdK = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), + Shape, Int>{}, + make_stride(params.dk_row_stride, _1{})); + Tensor gdV = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), + Shape, Int>{}, + make_stride(params.dv_row_stride, _1{})); + typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; + auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); + Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); + Tensor tdKrdK = make_tensor(shape(tdKgdK)); + Tensor tdVrdV = make_tensor(shape(tdVgdV)); + clear(tdKrdK); + clear(tdVrdV); + Tensor cdKV = make_identity_tensor(make_shape(size<0>(gdK), size<1>(gdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); + Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); + #pragma unroll + for (int k = 0; k < size(tdKVpdKV); ++k) { tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy( + gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + flash::copy( + gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + return; + } + + if (Double_buffer && m_block % 2 == 1) { // Double buffer for sQ + tQsQ.data() = tQsQ.data() + size(sQ); + tSsQ.data() = tSsQ.data() + size(sQ); + tdKsQt.data() = tdKsQt.data() + size(sQ); + } + + if ((!Is_first && !Seq_parallel) || params.deterministic) { __syncthreads(); } + + if (Kernel_traits::Is_V_in_regs) { + // Clear the smem tiles to account for predicated off loads + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + flash::cp_async_fence(); + } + + Tensor tdOrdO = make_fragment_like(tdOgdO); + Tensor tdOrO = make_fragment_like(tdOgO); + if (!Is_first) { + // Clear the smem tiles to account for predicated off loads + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOsdO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM + ); + } else { + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM + ); + flash::copy( + gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM + ); + } + flash::copy( + gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM + ); + + Tensor caccS = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) + Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) + static_assert(decltype(size<0>(taccScS))::value == 4); + // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. + Tensor taccScS_row = logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); + Tensor lse = make_tensor(Shape>{}); + #pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + const int row = get<0>(taccScS_row(mi)); + lse(mi) = Is_even_MN || row < binfo.actual_seqlen_q - m_block * kBlockM ? gLSE(row) : INFINITY; + } + // We want LSE = inf if the row is OOB. In that case Q would be zero, K would be zero, + // and scores would be zero. With LSE = 0, probs will be all 1's, and when we multiply + // with V (which would be zero), we're fine. However, with ALiBi, we might modify these + // scores, and probs can become NaN. Instead if we set LSE = inf for OOB rows, probs are always 0. + + // Tensor tKrK = make_fragment_like(tKsK); + // // cute::copy(gmem_tiled_copy_QKV, tKgK(_, _, _, 0), tKrK); + // cute::copy(gmem_tiled_copy_QKV, tKgK, tKrK); + // // if (cute::thread(1, 0)) { print(tKrK); } + + flash::copy( + gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + if (!Kernel_traits::Is_V_in_regs) { + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + } + flash::cp_async_fence(); + + // if (cute::thread0()) { print(tdOgdO.layout()); printf("\n"); print(tdOrdO); print(tdOrO); } + if (Is_first) { + cute::copy(tdOrdO, tdOsdO); + dot_do_o(tdOrdO, tdOrO, gdPsum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout); + } + + if (Kernel_traits::Is_V_in_regs) { + cute::cp_async_wait<1>(); + __syncthreads(); + Tensor tdPrV_copy_view = smem_thr_copy_KV.retile_D(tdPrV); + CUTE_STATIC_ASSERT_V(size<1>(tdPsV) == size<1>(tdPrV_copy_view)); // M + cute::copy(smem_tiled_copy_KV, tdPsV, tdPrV_copy_view); + } + + auto seed = params.rng_state[0]; + auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; + + clear(acc_dv); + clear(acc_dk); + + float alibi_slope = !Has_alibi ? 0.0f : reinterpret_cast(params.alibi_slopes_ptr)[bidb * params.alibi_slopes_batch_stride + bidh] / params.scale_softmax; + + for (; m_block >= m_block_min; --m_block) { + Tensor acc_s = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) + clear(acc_s); + cute::cp_async_wait<0>(); + __syncthreads(); + + Tensor dP_sum = make_fragment_like(lse); + #pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { dP_sum(mi) = gdPsum(get<0>(taccScS_row(mi))); } + + // if (cute::thread0()) { print(sK); } + // Tensor tSrK_copy_view = smem_thr_copy_KV.retile_D(tSrK); + // #pragma unroll + // for (int k = 0; k < size<2>(tSrK_copy_view); ++k) { + // cute::copy(smem_tiled_copy_KV, tSsK(_, _, k), tSrK_copy_view(_, _, k)); + // } + // if (cute::thread0()) { print(tSrK); } + flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV); + + // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) + Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + // if (cute::thread(32, 0)) { print(scores); } + + if (Has_alibi) { + flash::apply_alibi( + scores, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, + m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, + AtomLayoutMS * 16, + alibi_slope + ); + } + + // TD [2023-07-29]: I was thinking that we don't need to mask out the elements beyond + // actual_seqlen_k, because acc_s would be some finite value for those indices. + // In the end when we multiply with K to get dQ, the corresponding values of K would be 0, + // so the result would still be correct. + // However, it's possible that the values in acc_s are so large that they overflow + // when we multiply with dP and convert to fp16, resulting in Inf in dS and NaNs in dQ. + // So we need to mask out the elements beyond actual_seqlen_k. + if (!Is_causal && !Is_local) { + if (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k) { + flash::apply_mask(scores, binfo.actual_seqlen_k, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16); + } + } else if (Is_causal) { + // Putting this causal masking right after acc_s is *much* slower for some reason. + // TD [2023-08-16]: We need the 2nd condition because if seqlen_q is long and seqlen_k is short + // (e.g., 256 and 2), the 2nd block of seqlen_q (from 128 to 255), we're not doing causal masking. + // But we still want to mask out elements beyond actual_seqlen_k. + if (m_block * kBlockM < (n_block + 1) * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k + || (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k)) { + flash::apply_mask_causal(scores, n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, + // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % AtomLayoutMS * 16 + (tidx % 32) / 4, + AtomLayoutMS * 16); + } + } else if (Is_local) { + if (m_block * kBlockM < (n_block + 1) * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k - params.window_size_right + || (m_block + 1) * kBlockM >= n_block * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k + params.window_size_left + || (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k)) { + flash::apply_mask_local(scores, n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, AtomLayoutMS * 16, + params.window_size_left, params.window_size_right); + } + + } + + // if (cute::thread(32, 0)) { print(scores); } + // Compute the exponential value. + flash::scale_apply_exp2(scores, lse, params.scale_softmax_log2); + if (Is_dropout) { + int warp_id = tidx / 32; + int block_row_idx = m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; + // Need col to be multiples of 32, since we're doing dropout with block of 16 x 32 + static_assert(MMA_N_SdP % 2 == 0); + int block_col_idx = n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); + Tensor scores_dropped = make_tensor(scores.data(), flash::convert_layout_rowcol_Aregs(scores.layout())); + flash::apply_dropout( + scores_dropped, params.p_dropout_in_uint8_t, seed, offset, + block_row_idx, block_col_idx, AtomLayoutMS + ); + } + // Convert scores from fp32 to fp16/bf16 + Tensor rP = !Is_dropout + ? flash::convert_type(scores) + : flash::convert_type_relu(scores); + // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, MMA_N / 2) + // if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using m16n8k8. + Tensor tPrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs(rP.layout())); + Tensor tPaP = smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); + // if (cute::thread0()) { print(tPaP); } + // __syncthreads(); + // if (cute::thread0()) { print(sP); } + + Tensor acc_dp = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) + CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA + + clear(acc_dp); + // Tensor acc_dp_reshaped = make_tensor(acc_dp.data(), flash::convert_layout_acc_rowcol(acc_dp.layout())); + // #pragma unroll + // for (int mi = 0; mi < size<0>(acc_dp_reshaped); ++mi) { + // #pragma unroll + // for (int ni = 0; ni < size<1>(acc_dp_reshaped); ++ni) { + // acc_dp_reshaped(mi, ni) = -dP_sum(mi); + // } + // } + + // if (cute::thread0()) { print(dP_sum); } + + flash::gemm( + acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV + ); + + // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) + Tensor dS = make_tensor(acc_dp.data(), scores.layout()); + auto pointwise_mult = [](float p, float dp, float d) { + return p * (!Is_dropout || p >= 0 ? dp - d : d); + }; + #pragma unroll + for (int mi = 0; mi < size<0>(dS); ++mi) { + #pragma unroll + for (int ni = 0; ni < size<1>(dS); ++ni) { + dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); + } + } + // if (cute::thread0()) { print(dS); } + + Tensor acc_dq = partition_fragment_C(tiled_mma_dq, Shape, Int>{}); // MMA, MMA_N, MMA_K + tdQgdQaccum.data() = tdQgdQaccum.data() + (-int(kBlockM * params.h * params.d_rounded)); + if (Is_first || Seq_parallel) { + clear(acc_dq); + } else { + // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum + Tensor acc_dq_reshaped = make_tensor(acc_dq.data(), + make_layout(get<0>(acc_dq.layout()), + get<2>(acc_dq.layout()), + get<1>(acc_dq.layout()))); + cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, acc_dq_reshaped); + } + + if (Double_buffer && m_block > m_block_min) { + // Double buffer for sQ + const int sQ_offset = m_block % 2 == 0 ? size(sQ) : -size(sQ); + tQsQ.data() = tQsQ.data() + sQ_offset; + tSsQ.data() = tSsQ.data() + sQ_offset; + // Advance gQ + tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ); + flash::cp_async_fence(); + } + + Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); + // Convert dS from fp32 to fp16 + Tensor tdSrdS = flash::convert_type(dS_reshaped); + // if (cute::thread0()) { print(tPrP); } + Tensor tdSadS = smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); + __syncthreads(); + + // Layout p_l = tPrP.layout(); + // Tensor tdVrPt = make_tensor(tPrP.data(), make_layout(get<0>(p_l), get<2>(p_l), get<1>(p_l))); + // flash::gemm_A_in_regs(acc_dv, tdVrPt, tdVrdO, tdVsdOt, tiled_mma_dkv, smem_thr_copy_QdOt); + // Tensor tdKrdSt = make_tensor(tdSrdS.data(), tdVrPt.layout()); + // flash::gemm_A_in_regs(acc_dk, tdKrdSt, tdKrQt, tdKsQt, tiled_mma_dkv, smem_thr_copy_QdOt); + flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); + // if (cute::thread0() && n_block == 0 && m_block == 0) { print(tdVrPt); } + // if (cute::thread0()) { print(acc_dv); } + + __syncthreads(); // Need syncthreads since we're writing to the same sdO location + + if (m_block > m_block_min) { + // Advance gdO + tdOgdO.data() = tdOgdO.data() + (-int(kBlockM * params.do_row_stride)); + if (Is_first) { + tdOgO.data() = tdOgO.data() + (-int(kBlockM * params.o_row_stride)); + flash::copy(gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ); + flash::copy(gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ); + } else { + flash::copy(gmem_tiled_copy_dO, tdOgdO, tdOsdO, tQcQ, tQpQ); + flash::cp_async_fence(); + } + } + + flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, + smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, smem_thr_copy_Kt); + // if (cute::thread0()) { print(acc_dq); } + + if (m_block > m_block_min) { + gLSE.data() = gLSE.data() + (-int(kBlockM)); + #pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { lse(mi) = gLSE(get<0>(taccScS_row(mi))); } + gdPsum.data() = gdPsum.data() + (-int(kBlockM)); + } + + if (!Is_last) { + // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum + Tensor acc_dq_reshaped = make_tensor(acc_dq.data(), + make_layout(get<0>(acc_dq.layout()), + get<2>(acc_dq.layout()), + get<1>(acc_dq.layout()))); + if (!Seq_parallel) { + cute::copy(gmem_tiled_copy_dQaccum, acc_dq_reshaped, tdQgdQaccum); + } else { + // if (cute::thread0()) { print(acc_dq.layout()); printf("\n"); print(acc_dq_reshaped.layout()); printf("\n"); print(tdQgdQaccum.layout()); printf("\n"); } + CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); + #pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { atomicAdd(&tdQgdQaccum(i), acc_dq(i)); } + } + } else { + #pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) *= params.scale_softmax_rp_dropout; } + // Convert acc_dq from fp32 to fp16 + Tensor rdQ = flash::convert_type(acc_dq); + Tensor taccdQrdQ = smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); + } + + flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); + // if (cute::thread0()) { print(acc_dk); } + if (Double_buffer) { // Double buffer for sQ + tdKsQt.data() = tdKsQt.data() + (m_block % 2 == 0 ? size(sQ) : -size(sQ)); + } + if (!Double_buffer && m_block > m_block_min) { + __syncthreads(); + // Advance gQ + tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ); + flash::cp_async_fence(); + } + + if (Is_first && m_block > m_block_min) { + cute::copy(tdOrdO, tdOsdO); + dot_do_o(tdOrdO, tdOrO, gdPsum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout); + } + + if (Is_last) { + __syncthreads(); + Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); + cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); + tdQgdQ.data() = tdQgdQ.data() + (-int(kBlockM * params.dq_row_stride)); + Tensor cdQ = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); + #pragma unroll + for (int m = 0; m < size<1>(tdQgdQ); ++m) { + if (Is_even_MN || get<0>(tdQcdQ(0, m, 0)) < binfo.actual_seqlen_q - m_block * kBlockM) { + cute::copy(gmem_tiled_copy_dQ, tdQrdQ(_, m, _), tdQgdQ(_, m, _)); + } + } + } + + } + + // Epilogue + + if (Is_dropout) { + #pragma unroll + for (int i = 0; i < size(acc_dv); ++i) { acc_dv(i) *= params.rp_dropout; } + } + #pragma unroll + for (int i = 0; i < size(acc_dk); ++i) { acc_dk(i) *= params.scale_softmax_rp_dropout; } + + // Convert acc_dv from fp32 to fp16 + Tensor rdK = flash::convert_type(acc_dk); + Tensor rdV = flash::convert_type(acc_dv); + + Tensor sdK = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) + Tensor sdV = make_tensor(sdK.data() + size(sdK), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) + + // Partition sdV and sdK to match the accumulator partitioning + auto smem_tiled_copy_dKV = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); + auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor taccdKrdK = smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdKsdK = smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) + Tensor taccdVrdV = smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdVsdV = smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + // We need syncthreads here since we're writing to the same location as sK and sV. + // Without syncthreads, some thread might modify the location of sK while another thread + // is reading it for dQ gemm, leading to a race condition. + // If Is_last, there's already a __syncthreads() at the end of the loop. + if (!Is_last) { __syncthreads(); } + + cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); + cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); + + const index_t row_offset_dk = binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; + const index_t row_offset_dv = binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; + Tensor gdK = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), + Shape, Int>{}, + make_stride(params.dk_row_stride, _1{})); + Tensor gdV = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), + Shape, Int>{}, + make_stride(params.dv_row_stride, _1{})); + + typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; + auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor tdKsdK = gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); + Tensor tdVsdV = gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); + + __syncthreads(); + Tensor tdKrdK = make_tensor(shape(tdKgdK)); + cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); + Tensor tdVrdV = make_tensor(shape(tdVgdV)); + cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); + Tensor cdKV = make_identity_tensor(make_shape(size<0>(sdK), size<1>(sdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); + Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); + #pragma unroll + for (int k = 0; k < size(tdKVpdKV); ++k) { tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy( + gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + flash::copy( + gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_dq_dk_dv_1rowblock(const Params ¶ms, const int bidb, const int bidh, const int m_block) { + + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + // constexpr int kNWarps = Kernel_traits::kNWarps; + constexpr int MMA_N_SdP = kBlockN / decltype(size<1>(typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; + constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; + + const BlockInfo binfo(params, bidb); + if (m_block * kBlockM >= binfo.actual_seqlen_q || binfo.actual_seqlen_k == 0) return; + + int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN); + if (Is_causal) { + n_block_max = std::min(n_block_max, cute::ceil_div((m_block + 1) * kBlockM, kBlockN)); + } + + // We iterate over the blocks in reverse order. This is because the last block is the only one + // that needs masking when we read K and V from global memory. Moreover, iterating in reverse + // might save us 1 register (we just need n_block instead of both n_block and n_block_max). + + const index_t row_offset_q = binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride; + // We move K and V to the last block. + const index_t row_offset_k = binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + + (n_block_max - 1) * kBlockN * params.k_row_stride + (bidh / params.h_h_k_ratio) * params.k_head_stride; + const index_t row_offset_v = binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + + (n_block_max - 1) * kBlockN * params.v_row_stride + (bidh / params.h_h_k_ratio) * params.v_head_stride; + const index_t row_offset_do = binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + + m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; + const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + // We'll advance gdKaccum and gdVaccum before the first write. + const index_t row_offset_dkv_accum = ((bidb * params.h_k + (bidh / params.h_h_k_ratio)) * params.seqlen_k_rounded + + n_block_max * kBlockN) * params.d_rounded; + const index_t row_offset_lse = (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM; + + // We assume that params.d == kHeadDim for now + Tensor gQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), + Shape, Int>{}, + make_stride(params.q_row_stride, _1{})); + Tensor gK = make_tensor(make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), + Shape, Int>{}, + make_stride(params.k_row_stride, _1{})); + Tensor gV = make_tensor(make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), + Shape, Int>{}, + make_stride(params.v_row_stride, _1{})); + Tensor gdO = make_tensor(make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), + Shape, Int>{}, + make_stride(params.do_row_stride, _1{})); + Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gdKaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + row_offset_dkv_accum), + Shape, Int>{}, + Stride, _1>{}); + Tensor gdVaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + row_offset_dkv_accum), + Shape, Int>{}, + Stride, _1>{}); + Tensor gLSE = make_tensor(make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + row_offset_lse), + Shape>{}, Stride<_1>{}); + + Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutQdO{}); + Tensor sQt = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sQtNoSwizzle = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + Tensor sdO = make_tensor(sQ.data() + size(sQ), typename Kernel_traits::SmemLayoutQdO{}); + Tensor sdOt = make_tensor(sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sdOtransposedNoSwizzle = make_tensor(sdO.data(), + typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + Tensor sK = make_tensor(sdO.data() + size(sdO), typename Kernel_traits::SmemLayoutKV{}); + // Double buffer for sK + Tensor sV = make_tensor(sK.data() + 2 * size(sK), typename Kernel_traits::SmemLayoutKV{}); + Tensor sKt = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); + Tensor sKtNoSwizzle = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); + Tensor sdS = make_tensor(sV.data() + size(sV), typename Kernel_traits::SmemLayoutPdS{}); + Tensor sdSt = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sdStNoSwizzle = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + Tensor sP = make_tensor(sdS.data() + size(sdS), typename Kernel_traits::SmemLayoutPdS{}); + Tensor sPt = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sPtNoSwizzle = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + Tensor sdPsum = make_tensor(make_smem_ptr(reinterpret_cast(sdS.data().get())), + Shape>{}); + + typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; + auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; + auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dKVaccum; + auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); + + Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); + Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); + Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); + Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); + Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); + Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) + Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); + Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) + Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); + Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); + Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); + + typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; + auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); + Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) + Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) + Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) + Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) + + typename Kernel_traits::TiledMmadKV tiled_mma_dkv; + auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); + Tensor tdKrdSt = thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdKrQt = thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) + Tensor tdVrPt = thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdVrdO = thr_mma_dkv.partition_fragment_B(sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) + + typename Kernel_traits::TiledMmadQ tiled_mma_dq; + auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); + Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) + Tensor tdQrKt = thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) + + Tensor acc_dq = partition_fragment_C(tiled_mma_dq, Shape, Int>{}); // MMA, MMA_M_SdP, MMA_K + + // + // Copy Atom retiling + // + + auto smem_tiled_copy_QdO = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); + Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); + Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); + + auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); + Tensor tSsK = smem_thr_copy_KV.partition_S(sK); + Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); + + // Partition sP and sdS to match the accumulator partitioning + // This has to be tiled_mma_sdp, not tiled_mma_dkv + auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN(typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); + auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); + Tensor tPsP = smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) + Tensor tdSsdS = smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + auto smem_tiled_copy_PdSt = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); + Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); + Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); + + auto smem_tiled_copy_QdOt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); + Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); + Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); + + auto smem_tiled_copy_dS = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); + auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); + Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); + + auto smem_tiled_copy_Kt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); + auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); + Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); + + // + // PREDICATES + // + + // Construct identity layout for sQ and sK + Tensor cQ = make_identity_tensor(make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor cKV = make_identity_tensor(make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + // Repeat the partitioning with identity layouts + Tensor tQcQ = gmem_thr_copy_QKV.partition_S(cQ); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) + Tensor tKVcKV = gmem_thr_copy_QKV.partition_S(cKV); // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k) + + // Allocate predicate tensors for k + Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); + Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); + + // Set predicates for k bounds + if (!Is_even_K) { + #pragma unroll + for (int k = 0; k < size(tQpQ); ++k) { tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; } + #pragma unroll + for (int k = 0; k < size(tKVpKV); ++k) { tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; } + } + + // Prologue + + Tensor tdOrdO = make_fragment_like(tdOgdO); + Tensor tdOrO = make_fragment_like(tdOgO); + + // TODO: Might need to exit early and write 0 to gdQ. + + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM + ); + flash::copy( + gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM + ); + + Tensor tQrQ = make_fragment_like(tQgQ); + flash::copy( + gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM + ); + + int n_block = n_block_max - 1; + if (n_block % 2 == 1) { + tKsK.data() = tKsK.data() + size(sK); + tSsK.data() = tSsK.data() + size(sK); + tdQsKt.data() = tdQsKt.data() + size(sK); + } + + flash::copy( + gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN + ); + + Tensor caccS = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) + Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) + static_assert(decltype(size<0>(taccScS))::value == 4); + // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. + Tensor taccScS_row = logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); + Tensor lse = make_tensor(Shape>{}); + #pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + const int row = get<0>(taccScS_row(mi)); + lse(mi) = row < binfo.actual_seqlen_q - m_block * kBlockM ? gLSE(row) : 0; + } + + cute::cp_async_fence(); + + Tensor dP_sum = make_fragment_like(lse); + cute::copy(tdOrdO, tdOsdO); + dot_do_o( + tdOrdO, tdOrO, sdPsum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout + ); + __syncthreads(); + #pragma unroll + for (int mi = 0; mi < size(dP_sum); ++mi) { dP_sum(mi) = sdPsum(get<0>(taccScS_row(mi))); } + + auto seed = params.rng_state[0]; + auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; + + clear(acc_dq); + + float alibi_slope = !Has_alibi ? 0.0f : reinterpret_cast(params.alibi_slopes_ptr)[bidb * params.alibi_slopes_batch_stride + bidh] / params.scale_softmax; + + for (; n_block >= 0; --n_block) { + Tensor acc_s = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_M_SdP, MMA_N) + clear(acc_s); + flash::cp_async_wait<0>(); + __syncthreads(); + + flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV); + + // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) + Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + + if (Has_alibi) { + flash::apply_alibi( + scores, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, + m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, + AtomLayoutMS * 16, + alibi_slope + ); + } + + // We don't need to mask out the elements beyond actual_seqlen_k, because acc_s would + // be some finite value for those indices. In the end when we multiply with K to get dQ, + // the corresponding values of K would be 0, so the result would still be correct. + if (Is_causal && m_block * kBlockM < (n_block + 1) * kBlockN) { + flash::apply_mask_causal(scores, n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % AtomLayoutMS * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, + AtomLayoutMS * 16); + } + + // Compute the exponential value. + flash::scale_apply_exp2(scores, lse, params.scale_softmax_log2); + if (Is_dropout) { + int warp_id = tidx / 32; + int block_row_idx = m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; + // Need col to be multiples of 32, since we're doing dropout with block of 16 x 32 + static_assert(MMA_N_SdP % 2 == 0); + int block_col_idx = n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); + Tensor scores_dropped = make_tensor(scores.data(), flash::convert_layout_rowcol_Aregs(scores.layout())); + flash::apply_dropout( + scores_dropped, params.p_dropout_in_uint8_t, seed, offset, + block_row_idx, block_col_idx, AtomLayoutMS + ); + } + // Convert scores from fp32 to fp16/bf16 + Tensor rP = !Is_dropout + ? flash::convert_type(scores) + : flash::convert_type_relu(scores); + // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, MMA_N / 2) + // if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using m16n8k8. + Tensor tPrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs(rP.layout())); + Tensor tPaP = smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); + + Tensor acc_dp = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) + CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA + + clear(acc_dp); + flash::gemm(acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV); + + // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) + Tensor dS = make_tensor(acc_dp.data(), scores.layout()); + auto pointwise_mult = [](float p, float dp, float d) { + return p * (!Is_dropout || p >= 0 ? dp - d : d); + }; + #pragma unroll + for (int mi = 0; mi < size<0>(dS); ++mi) { + #pragma unroll + for (int ni = 0; ni < size<1>(dS); ++ni) { + dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); + } + } + + Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); + // Convert dS from fp32 to fp16 + Tensor tdSrdS = flash::convert_type(dS_reshaped); + Tensor tdSadS = smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); + __syncthreads(); + + if (n_block > 0) { + // Double buffer for sK + const int sK_offset = n_block % 2 == 0 ? size(sK) : -size(sK); + tKsK.data() = tKsK.data() + sK_offset; + tSsK.data() = tSsK.data() + sK_offset; + // Advance gK, gV + tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); + tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV); + flash::copy(gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV); + // This cp_async_fence needs to be in the if block, otherwise the synchronization + // isn't right and we get race conditions. + cute::cp_async_fence(); + } + + Tensor acc_dv = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + clear(acc_dv); + flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); + // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { print(acc_dv); } + tdVgdVaccum.data() = tdVgdVaccum.data() + (-int(kBlockN * params.d_rounded)); + #pragma unroll + for (int i = 0; i < size(acc_dv); ++i) { atomicAdd(&tdVgdVaccum(i), acc_dv(i)); } + + __syncthreads(); + Tensor acc_dk = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + clear(acc_dk); + flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); + tdKgdKaccum.data() = tdKgdKaccum.data() + (-int(kBlockN * params.d_rounded)); + #pragma unroll + for (int i = 0; i < size(acc_dk); ++i) { atomicAdd(&tdKgdKaccum(i), acc_dk(i)); } + + flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, + smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, smem_thr_copy_Kt); + // Double buffer for sK + tdQsKt.data() = tdQsKt.data() + (n_block % 2 == 0 ? size(sK) : -size(sK)); + + } + + // Epilogue + + #pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) *= params.scale_softmax_rp_dropout; } + // Convert acc_dq from fp32 to fp16 + Tensor rdQ = flash::convert_type(acc_dq); + + Tensor sdQ = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutdQ{}); + + // Partition sdV and sdK to match the accumulator partitioning + auto smem_tiled_copy_dQ = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); + auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor taccdQrdQ = smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdQsdQ = smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + __syncthreads(); + cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); + + const index_t row_offset_dq = binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + + m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; + Tensor gdQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), + Shape, Int>{}, + make_stride(params.dq_row_stride, _1{})); + + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; + auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor tdQsdQ = gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); + + __syncthreads(); + + Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); + cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); + + Tensor cdQ = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); + Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); + if (!Is_even_K) { + #pragma unroll + for (int k = 0; k < size(tdQpdQ); ++k) { tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; } + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy( + gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, tdQpdQ, binfo.actual_seqlen_q - m_block * kBlockM + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_dq_dk_dv(const Params ¶ms) { + + // The block index for the batch. + const int bidb = blockIdx.x; + // const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.y; + // const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + const int n_block_max = (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; + if (n_block_max == 1) { + compute_dq_dk_dv_1colblock(params, bidb, bidh, 0); + } else { + // Iterating backward from n_block_max - 1 to 0 might save 1 register + compute_dq_dk_dv_1colblock(params, bidb, bidh, n_block_max - 1); + for (int n_block = n_block_max - 2; n_block > 0; n_block--) { + compute_dq_dk_dv_1colblock(params, bidb, bidh, n_block); + } + compute_dq_dk_dv_1colblock(params, bidb, bidh, 0); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_dq_dk_dv_seqk_parallel(const Params ¶ms) { + + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + + // If deterministic, each thread block will do atomicAdd to a different dQ_accum buffer. + for (int n_block = blockIdx.x; n_block < (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; n_block += gridDim.x) { + compute_dq_dk_dv_1colblock(params, bidb, bidh, n_block); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_dq_dk_dv_seqq_parallel(const Params ¶ms) { + + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + + compute_dq_dk_dv_1rowblock(params, bidb, bidh, m_block); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +} // namespace flash diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h b/external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h new file mode 100644 index 000000000..a72664f32 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h @@ -0,0 +1,364 @@ +// Copyright (c) 2023, Tri Dao. + +#pragma once + +// #include + +#include "static_switch.h" +#include "flash.h" +#include "flash_bwd_kernel.h" + +template +__global__ void flash_bwd_dot_do_o_kernel(Flash_bwd_params params) { + flash::compute_dot_do_o(params); +} + +template +__global__ void flash_bwd_clear_dkvaccum_kernel(Flash_bwd_params params) { + flash::clear_dKVaccum(params); +} + +template +__global__ void flash_bwd_dq_dk_dv_loop_kernel(Flash_bwd_params params) { + flash::compute_dq_dk_dv(params); +} + +template +__global__ void flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel(Flash_bwd_params params) { + static_assert(!(Is_causal && Is_local)); // If Is_local is true, Is_causal should be false + flash::compute_dq_dk_dv_seqk_parallel(params); +} + +template +__global__ void flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel(Flash_bwd_params params) { + flash::compute_dq_dk_dv_seqq_parallel(params); +} + +template +__global__ void flash_bwd_convert_dq_kernel(Flash_bwd_params params, const int nsplits) { + flash::convert_dQ(params, nsplits); +} + +template +__global__ void flash_bwd_convert_dkv_kernel(Flash_bwd_params params) { + flash::convert_dKV(params); +} + +template +void run_flash_bwd_seqk_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid_m(num_m_block, params.b, params.h); + const int num_n_block = (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; + int gridDimx = num_n_block; + if (params.deterministic) { + // auto dprops = at::cuda::getCurrentDeviceProperties(); + cudaDeviceProp dprops; + cudaGetDeviceProperties(&dprops, 0); + gridDimx = (dprops.multiProcessorCount + params.b * params.h - 1) / (params.b * params.h); + } + dim3 grid_n(gridDimx, params.b, params.h); + + if (!params.deterministic) { + flash_bwd_dot_do_o_kernel<<>>(params); + } else { + flash_bwd_dot_do_o_kernel<<>>(params); + } + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + + // We want to specialize to is_even_MN and not just is_even_M, since in the case where N is not + // a multiple of kBlockN, we'll need to apply mask in the loop. + const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_q % Kernel_traits::kBlockM == 0 && params.seqlen_k % Kernel_traits::kBlockN == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1colblock; + // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH((params.window_size_left >= 0 || params.window_size_right >= 0) && !params.is_causal, Is_local, [&] { + BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. + // If head dim > 128, set IsEvenMNConst to false to reduce number of templates + // If Is_local, set Is_causal to false + auto kernel = &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel; + // auto kernel = &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel; + if (smem_size_dq_dk_dv >= 48 * 1024) { + cudaFuncSetAttribute( + kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size_dq_dk_dv); + } + kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); + }); + }); + }); + }); + + auto kernel_dq = &flash_bwd_convert_dq_kernel; + if (Kernel_traits::kSmemdQSize >= 48 * 1024) { + cudaFuncSetAttribute( + kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemdQSize); + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemdQSize)); + } + kernel_dq<<>>(params, !params.deterministic ? 1 : gridDimx); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); +} + +template +void run_flash_bwd_seqq_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + const int num_n_block = (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; + dim3 grid_n(num_n_block, params.b, params.h_k); + flash_bwd_clear_dkvaccum_kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + + const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid_m(num_m_block, params.b, params.h); + // We also use is_even_N to set Unpadded in the BlockInfo constructor, so we need to check + // for cu_seqlens_k as well. + const bool is_even_N = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1rowblock; + // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + BOOL_SWITCH(is_even_N, IsEvenNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. + auto kernel = &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel; + // auto kernel = &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel; + if (smem_size_dq_dk_dv >= 48 * 1024) { + cudaFuncSetAttribute( + kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size_dq_dk_dv); + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size_dq_dk_dv)); + } + kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); + }); + }); + }); + + auto kernel_dkv = &flash_bwd_convert_dkv_kernel; + if (Kernel_traits::kSmemKVSize >= 48 * 1024) { + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel_dkv, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemKVSize)); + cudaFuncSetAttribute( + kernel_dkv, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemKVSize); + } + kernel_dkv<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); +} + +template +void run_flash_bwd(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + if (configure) return; + run_flash_bwd_seqk_parallel(params, stream, configure); +} + +template +void run_mha_bwd_hdim32(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 32; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= 2 * ((3 * 128 + 2 * 128) * Headdim + 2 * 128 * 128)) { // 104 KB + if constexpr(!Is_dropout) { // We can afford more registers to keep V in registers + run_flash_bwd, Is_dropout>(params, stream, configure); + } else { + run_flash_bwd, Is_dropout>(params, stream, configure); + } + } else { // 96 KB + run_flash_bwd, Is_dropout>(params, stream, configure); + } + }); +} + +template +void run_mha_bwd_hdim64(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 64; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + // Changing AtomLayoutMdQ from 2 to 4 takes the same time + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // This is slightly faster. We want to split M more so we need fewer registers to store LSE. + if (max_smem_per_block >= 144 * 1024) { + run_flash_bwd, Is_dropout>(params, stream, configure); + // This has a lot of register spilling + // run_flash_bwd, Is_dropout>(params, stream, configure); + } else { + // if (params.h == params.h_k) { + // run_flash_bwd, Is_dropout>(params, stream, configure); + run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // } else { + // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, configure); + // } + } + }); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // M=128, N=64 is quite slow, I think because we need to read/write dQaccum twice as many times + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + + // run_flash_bwd>(params, stream, configure); +} + +template +void run_mha_bwd_hdim96(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 96; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + // if (params.h == params.h_k) { + if (max_smem_per_block >= 116 * 1024) { + if constexpr(!Is_dropout) { // 92KB + run_flash_bwd, Is_dropout>(params, stream, configure); + } else { // 116 KB + // This is faster for dropout since we don't have many registers to spare + run_flash_bwd, Is_dropout>(params, stream, configure); + } + } else { + run_flash_bwd, Is_dropout>(params, stream, configure); + } + // } else { + // run_flash_bwd_seqq_parallel>(params, stream, configure); + // } + }); +} + +template +void run_mha_bwd_hdim128(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 128; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + // if (params.h == params.h_k) { + // run_flash_bwd>(params, stream, configure); + // This is faster, in the case of sequence-parallel bwd (where we need fewer registers). + // Out of these three, the 2nd one is slightly faster (2% faster than the first). Idk why. + // run_flash_bwd>(params, stream, configure); + if (max_smem_per_block >= 144 * 1024) { + run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); + // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); + // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + } else { + // run_flash_bwd, Is_dropout>(params, stream, configure); + run_flash_bwd, Is_dropout>(params, stream, configure); + } + // run_flash_bwd>(params, stream, configure); + + // run_flash_bwd>(params, stream, configure); + // } else { + // run_flash_bwd_seqq_parallel>(params, stream, configure); + // } + }); +} + +template +void run_mha_bwd_hdim160(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 160; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= 116 * 1024) { + run_flash_bwd, Is_dropout>(params, stream, configure); + } else { + run_flash_bwd, Is_dropout>(params, stream, configure); + } + }); +} + +template +void run_mha_bwd_hdim192(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 192; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= 136 * 1024) { + run_flash_bwd, Is_dropout>(params, stream, configure); + } else { + run_flash_bwd, Is_dropout>(params, stream, configure); + } + }); +} + +template +void run_mha_bwd_hdim224(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 224; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + run_flash_bwd, Is_dropout>(params, stream, configure); + }); +} + +template +void run_mha_bwd_hdim256(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { + constexpr static int Headdim = 256; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= 176 * 1024) { // H100 + run_flash_bwd, Is_dropout>(params, stream, configure); + } else { // A100, we don't do double buffering to save smem + run_flash_bwd, Is_dropout>(params, stream, configure); + } + }); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_bf16_sm80.cu new file mode 100644 index 000000000..6ffa4126e --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim128(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_fp16_sm80.cu new file mode 100644 index 000000000..19b005ad9 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim128_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim128(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_bf16_sm80.cu new file mode 100644 index 000000000..f674f4818 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim160(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_fp16_sm80.cu new file mode 100644 index 000000000..afd0a8a38 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim160_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim160(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_bf16_sm80.cu new file mode 100644 index 000000000..aa91bdd66 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim192(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_fp16_sm80.cu new file mode 100644 index 000000000..37a965264 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim192_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim192(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_bf16_sm80.cu new file mode 100644 index 000000000..167a0df2b --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim224(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_fp16_sm80.cu new file mode 100644 index 000000000..58ffe75c3 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim224_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim224(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_bf16_sm80.cu new file mode 100644 index 000000000..1b3701415 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim256(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_fp16_sm80.cu new file mode 100644 index 000000000..9f35129c3 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim256_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim256(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_bf16_sm80.cu new file mode 100644 index 000000000..770de6fcf --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim32(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_fp16_sm80.cu new file mode 100644 index 000000000..8dbf8b94a --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim32_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim32(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_bf16_sm80.cu new file mode 100644 index 000000000..22eac8789 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim64(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_fp16_sm80.cu new file mode 100644 index 000000000..e6da5dd2d --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim64_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim64(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_bf16_sm80.cu new file mode 100644 index 000000000..9c003540c --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_bf16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim96(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_fp16_sm80.cu new file mode 100644 index 000000000..8108696a0 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_hdim96_fp16_sm80.cu @@ -0,0 +1,10 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template<> +void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t stream) { + run_mha_fwd_hdim96(params, stream); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_kernel.h b/external_libs/runtime/flash_attn/lib/flash_fwd_kernel.h new file mode 100644 index 000000000..7ddc07caf --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_kernel.h @@ -0,0 +1,1768 @@ +/****************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +#include + +#include +#include +#include + +#include "block_info.h" +#include "kernel_traits.h" +#include "softmax.h" +#include "utils.h" + +#include "alibi.h" + +namespace flash { + +using namespace cute; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void softmax_rescale_o(Tensor0 &scores, Tensor1 &scores_max, + Tensor1 &scores_sum, Tensor2 &acc_o, + float softmax_scale_log2) { + if (Is_first) { + flash::template reduce_max(scores, scores_max); + flash::scale_apply_exp2(scores, scores_max, softmax_scale_log2); + flash::reduce_sum(scores, scores_sum); + } else { + Tensor scores_max_prev = make_fragment_like(scores_max); + cute::copy(scores_max, scores_max_prev); + flash::template reduce_max(scores, scores_max); + // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, + // MMA_K)) + Tensor acc_o_rowcol = make_tensor( + acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout())); +#pragma unroll + for (int mi = 0; mi < size(scores_max); ++mi) { + float scores_max_cur = + !Check_inf ? scores_max(mi) + : (scores_max(mi) == -INFINITY ? 0.0f : scores_max(mi)); + float scores_scale = + exp2f((scores_max_prev(mi) - scores_max_cur) * softmax_scale_log2); + scores_sum(mi) *= scores_scale; +#pragma unroll + for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) { + acc_o_rowcol(mi, ni) *= scores_scale; + } + } + flash::scale_apply_exp2(scores, scores_max, softmax_scale_log2); + Tensor scores_sum_cur = make_fragment_like(scores_sum); + flash::reduce_sum(scores, scores_sum_cur); +#pragma unroll + for (int mi = 0; mi < size(scores_sum); ++mi) { + scores_sum(mi) += scores_sum_cur(mi); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void +write_softmax_to_gmem(Tensor const &tOrP, + Tensor &tPgP, + TiledCopy gmem_tiled_copy_P) { + // Reshape tOrP from (8, MMA_M, MMA_N) to (8, MMA_M * MMA_N) + Layout l = tOrP.layout(); + Tensor tPrP = make_tensor( + tOrP.data(), make_layout(get<0>(l), make_layout(get<1>(l), get<2>(l)))); + CUTE_STATIC_ASSERT_V(size<2>(tPgP) == _1{}); + CUTE_STATIC_ASSERT_V(size<1>(tPrP) == size<1>(tPgP)); +#pragma unroll + for (int mi = 0; mi < size<1>(tPrP); ++mi) { + cute::copy(gmem_tiled_copy_P, tPrP(_, mi), tPgP(_, mi, 0)); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_attn_1rowblock(const Params ¶ms, + const int bidb, const int bidh, + const int m_block) { + + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + constexpr int kNWarps = Kernel_traits::kNWarps; + constexpr int MMA_M = + kBlockM / decltype(size<0>( + typename Kernel_traits::TiledMma::TiledShape_MNK{}))::value; + + const BlockInfo binfo(params, bidb); + if (m_block * kBlockM >= binfo.actual_seqlen_q) + return; + + const int n_block_min = + !Is_local + ? 0 + : std::max(0, (m_block * kBlockM + binfo.actual_seqlen_k - + binfo.actual_seqlen_q - params.window_size_left) / + kBlockN); + int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN); + if (Is_causal || Is_local) { + n_block_max = std::min( + n_block_max, + cute::ceil_div((m_block + 1) * kBlockM + binfo.actual_seqlen_k - + binfo.actual_seqlen_q + params.window_size_right, + kBlockN)); + // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { + // printf("m_block = %d, n_block_max = %d\n", m_block, n_block_max); + // } + } + // We exit early and write 0 to gO and gLSE. This also covers the case where + // actual_seqlen_k == 0. Otherwise we might read OOB elements from gK and gV. + if ((Is_causal || Is_local || !Is_even_MN) && n_block_max <= n_block_min) { + // Save seed and offset for backward. If we don't have this here, the 0-th + // thread block might exit early and no one saves the rng state. + // if (Is_dropout && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0 + // && + // tidx == 0) { + // auto seeds = at::cuda::philox::unpack(params.philox_args); + // params.rng_state[0] = std::get<0>(seeds); + // params.rng_state[1] = std::get<1>(seeds); + // } + const index_t row_offset_o = + binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + const index_t row_offset_lse = + (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM; + Tensor gO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gLSE = make_tensor( + make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + + row_offset_lse), + Shape>{}, Stride<_1>{}); + + typename Kernel_traits::GmemTiledCopyO gmem_tiled_copy_O; + auto gmem_thr_copy_O = gmem_tiled_copy_O.get_thread_slice(tidx); + Tensor tOgO = gmem_thr_copy_O.partition_D(gO); + Tensor tOrO = make_tensor(shape(tOgO)); + clear(tOrO); + // Construct identity layout for sO + Tensor cO = make_identity_tensor( + make_shape(size<0>(gO), size<1>(gO))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + // Repeat the partitioning with identity layouts + Tensor tOcO = gmem_thr_copy_O.partition_D(cO); + Tensor tOpO = make_tensor(make_shape(size<2>(tOgO))); + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tOpO); ++k) { + tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d; + } + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy( + gmem_tiled_copy_O, tOrO, tOgO, tOcO, tOpO, + binfo.actual_seqlen_q - m_block * kBlockM); +#pragma unroll + for (int m = 0; m < size<1>(tOgO); ++m) { + const int row = get<0>(tOcO(0, m, 0)); + if (row < binfo.actual_seqlen_q - m_block * kBlockM && + get<1>(tOcO(0, m, 0)) == 0) { + gLSE(row) = INFINITY; + } + } + return; + } + // if (tidx == 0) { printf("m_block = %d, n_block_min = %d, n_block_max = + // %d\n", m_block, n_block_min, n_block_max); } + + // We iterate over the blocks in reverse order. This is because the last block + // is the only one that needs masking when we read K and V from global memory. + // Moreover, iterating in reverse might save us 1 register (we just need + // n_block instead of both n_block and n_block_max). + + const index_t row_offset_q = + binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride; + // We move K and V to the last block. + const index_t row_offset_k = + binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + + (n_block_max - 1) * kBlockN * params.k_row_stride + + (bidh / params.h_h_k_ratio) * params.k_head_stride; + const index_t row_offset_v = + binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + + (n_block_max - 1) * kBlockN * params.v_row_stride + + (bidh / params.h_h_k_ratio) * params.v_head_stride; + const index_t row_offset_p = + ((bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM) * + params.seqlen_k_rounded + + (n_block_max - 1) * kBlockN; + + Tensor gQ = make_tensor( + make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), + Shape, Int>{}, + make_stride(params.q_row_stride, _1{})); + Tensor gK = make_tensor( + make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), + Shape, Int>{}, + make_stride(params.k_row_stride, _1{})); + Tensor gV = make_tensor( + make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), + Shape, Int>{}, + make_stride(params.v_row_stride, _1{})); + Tensor gP = make_tensor( + make_gmem_ptr(reinterpret_cast(params.p_ptr) + row_offset_p), + Shape, Int>{}, + make_stride(params.seqlen_k_rounded, _1{})); + + Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutQ{}); + // Careful we're using the same smem for sQ and sK | sV if Share_Q_K_smem; + Tensor sK = + make_tensor(sQ.data() + (Kernel_traits::Share_Q_K_smem ? 0 : size(sQ)), + typename Kernel_traits::SmemLayoutKV{}); + Tensor sV = + make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{}); + Tensor sVt = + make_tensor(sV.data(), typename Kernel_traits::SmemLayoutVtransposed{}); + Tensor sVtNoSwizzle = make_tensor( + sV.data(), typename Kernel_traits::SmemLayoutVtransposedNoSwizzle{}); + + typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; + auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopyP gmem_tiled_copy_P; + auto gmem_thr_copy_P = gmem_tiled_copy_P.get_thread_slice(tidx); + + Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); + Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); + Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) + Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); + Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) + Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); + Tensor tPgP = gmem_thr_copy_P.partition_D(gP); + + typename Kernel_traits::TiledMma tiled_mma; + auto thr_mma = tiled_mma.get_thread_slice(tidx); + Tensor tSrQ = thr_mma.partition_fragment_A(sQ); // (MMA,MMA_M,MMA_K) + Tensor tSrK = thr_mma.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) + Tensor tOrVt = + thr_mma.partition_fragment_B(sVtNoSwizzle); // (MMA, MMA_K,MMA_N) + + Tensor acc_o = partition_fragment_C( + tiled_mma, Shape, Int>{}); // MMA, MMA_M, MMA_K + + // + // Copy Atom retiling + // + + auto smem_tiled_copy_Q = + make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma); + auto smem_thr_copy_Q = smem_tiled_copy_Q.get_thread_slice(tidx); + // if (cute::thread0()) {smem_thr_copy_Q.print_all();} + Tensor tSsQ = smem_thr_copy_Q.partition_S(sQ); + // if (cute::thread0()) {print(tSsQ.layout()); printf("\n");} + + auto smem_tiled_copy_K = + make_tiled_copy_B(typename Kernel_traits::SmemCopyAtom{}, tiled_mma); + auto smem_thr_copy_K = smem_tiled_copy_K.get_thread_slice(tidx); + Tensor tSsK = smem_thr_copy_K.partition_S(sK); + + auto smem_tiled_copy_V = make_tiled_copy_B( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma); + auto smem_thr_copy_V = smem_tiled_copy_V.get_thread_slice(tidx); + Tensor tOsVt = smem_thr_copy_V.partition_S(sVt); + + // TODO: this might need to change if we change the mma instruction in SM70 + Tensor scores_max = + make_tensor(Shape(acc_o)>>{}); + Tensor scores_sum = make_fragment_like(scores_max); + + // + // PREDICATES + // + + // // Allocate predicate tensors for m and n + // Tensor tQpQ = make_tensor(make_shape(size<1>(tQsQ), size<2>(tQsQ)), + // Stride<_1,_0>{}); Tensor tKVpKV = + // make_tensor(make_shape(size<1>(tKsK), size<2>(tKsK)), + // Stride<_1,_0>{}); + + // Construct identity layout for sQ and sK + Tensor cQ = make_identity_tensor( + make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor cKV = make_identity_tensor( + make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + // Tensor tScQ = thr_mma.partition_A(cQ); // + // (MMA,MMA_M,MMA_K) if (cute::thread0()) { + // print(tScQ.layout()); printf("\n"); + // for (int i = 0; i < size(tScQ); ++i) { + // printf("%d ", get<0>(tScQ(i))); + // } + // printf("\n"); + // for (int i = 0; i < size(tScQ); ++i) { + // printf("%d ", get<1>(tScQ(i))); + // } + // printf("\n"); + // } + + // Repeat the partitioning with identity layouts + Tensor tQcQ = gmem_thr_copy_QKV.partition_S( + cQ); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) + Tensor tKVcKV = gmem_thr_copy_QKV.partition_S( + cKV); // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k) + + // Allocate predicate tensors for k + Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); + Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); + + // Set predicates for k bounds + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tQpQ); ++k) { + tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; + } +#pragma unroll + for (int k = 0; k < size(tKVpKV); ++k) { + tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; + } + } + + // Prologue + + Tensor tQrQ = make_fragment_like(tQgQ); + // We don't need to clear the sQ smem tiles since we'll only write out the + // valid outputs + flash::copy(gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, + tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + if (Kernel_traits::Is_Q_in_regs) { + cute::cp_async_fence(); + } + + // // Copy rmem to smem + // // copy(tQrQ, tQsQ); + // flash::cp_async_wait<0>(); + // __syncthreads(); + // // if (cute::thread(1, 0)) { print(tQsQ); } + // // Tensor sQNoSwizzle = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), typename Kernel_traits::SmemLayoutQNoSwizzle{}); + // // if (cute::thread0()) { print(sQNoSwizzle); } + + if (Kernel_traits::Share_Q_K_smem) { + flash::cp_async_wait<0>(); + __syncthreads(); + Tensor tSrQ_copy_view = smem_thr_copy_Q.retile_D(tSrQ); + CUTE_STATIC_ASSERT_V(size<1>(tSsQ) == size<1>(tSrQ_copy_view)); // M + cute::copy(smem_tiled_copy_Q, tSsQ, tSrQ_copy_view); + __syncthreads(); + } + + int n_block = n_block_max - 1; + // We don't need to clear the sK smem tiles since we'll mask out the scores + // anyway. + flash::copy(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, + tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + cute::cp_async_fence(); + // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z < 2) { print(tKgK); } + // __syncthreads(); + + if (Kernel_traits::Is_Q_in_regs && !Kernel_traits::Share_Q_K_smem) { + flash::cp_async_wait<1>(); + __syncthreads(); + Tensor tSrQ_copy_view = smem_thr_copy_Q.retile_D(tSrQ); + CUTE_STATIC_ASSERT_V(size<1>(tSsQ) == size<1>(tSrQ_copy_view)); // M + cute::copy(smem_tiled_copy_Q, tSsQ, tSrQ_copy_view); + } + + // auto seeds = at::cuda::philox::unpack(params.philox_args); + // unsigned long long seed = std::get<0>(seeds); + // unsigned long long offset = + // std::get<1>(seeds) + (bidb * params.h + bidh) * 32 + tidx % 32; + + unsigned long long seed = params.rng_state[0]; + unsigned long long offset = + params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; + // Save seed and offset for backward. + // if (Is_dropout && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0 && + // tidx == 0) { + // params.rng_state[0] = seed; + // params.rng_state[1] = std::get<1>(seeds); + // } + + clear(acc_o); + + float alibi_slope = + !Has_alibi ? 0.0f + : reinterpret_cast(params.alibi_slopes_ptr) + [bidb * params.alibi_slopes_batch_stride + bidh] / + params.scale_softmax; + + // For performance reason, we separate out two kinds of iterations: + // those that need masking on S, and those that don't. + // We need masking on S for the very last block when K and V has length not + // multiple of kBlockN. We also need masking on S if it's causal, for the last + // ceil_div(kBlockM, kBlockN) blocks. We will have at least 1 "masking" + // iteration. + + // If not even_N, then seqlen_k might end in the middle of a block. In that + // case we need to mask 2 blocks (e.g. when kBlockM == kBlockN), not just 1. + constexpr int n_masking_steps = + (!Is_causal && !Is_local) + ? 1 + : ((Is_even_MN && Is_causal) ? cute::ceil_div(kBlockM, kBlockN) + : cute::ceil_div(kBlockM, kBlockN) + 1); +#pragma unroll + for (int masking_step = 0; masking_step < n_masking_steps; + ++masking_step, --n_block) { + Tensor acc_s = partition_fragment_C( + tiled_mma, + Shape, Int>{}); // (MMA=4, MMA_M, MMA_N) + clear(acc_s); + flash::cp_async_wait<0>(); + __syncthreads(); + + // Advance gV + if (masking_step > 0) { + tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tVgV, + tVsV, tKVcKV, tKVpKV); + } else { + // Clear the smem tiles to account for predicated off loads + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + } + cute::cp_async_fence(); + + flash::gemm( + acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, + smem_tiled_copy_K, smem_thr_copy_Q, smem_thr_copy_K); + // if (cute::thread0()) { print(acc_s); } + + // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, + // MMA_N)) + Tensor scores = make_tensor( + acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + // if (cute::thread0()) { print_tensor(scores); } + // We don't put the masking before the matmul S = Q K^T because we don't + // clear sK for rows outside actual_seqlen_k. So those rows could have Inf / + // NaN, and the matmul can produce Inf / NaN. + + if (Has_alibi) { + flash::apply_alibi( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, alibi_slope); + } + + if (!Is_causal && !Is_local) { + if (!Is_even_MN) { + flash::apply_mask(scores, binfo.actual_seqlen_k - n_block * kBlockN); + } + } else { + // Tensor caccS = make_identity_tensor(Shape, + // Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) Tensor taccScS = + // thr_mma.partition_C(caccS); // + // (MMA,MMA_M,MMA_N) static_assert(decltype(size<0>(taccScS))::value == + // 4); + // // Convert to ((2, 2), MMA_M, MMA_N) then take only the row indices. + // Tensor idx_row = logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), + // _, 0); Tensor idx_rowcol = make_tensor(taccScS.data(), + // flash::convert_layout_acc_rowcol(taccScS.layout())); + // flash::apply_mask_causal_w_idx(scores, idx_rowcol, n_block * kBlockN, + // binfo.actual_seqlen_k, + // m_block * kBlockM); + // Idk why it's get<1> and not get<0> of the stride. + // if (cute::thread0()) { print(idx_row.layout()); + // print(stride<1>(idx_row)); printf("stride = %d \n", + // get<1>(stride<1>(idx_row))); } I can't get the stride from idx_row + flash::apply_mask_local( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + // m_block * kBlockM + get<0>(idx_row(0)), + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, params.window_size_left, + params.window_size_right + // m_block * kBlockM + (tidx / 32) * 16, kNWarps * 16 + // m_block * kBlockM + (tidx / 32) * (kBlockM / kNWarps), 16 + ); + // if (cute::thread0()) { print_tensor(scores); } + } + + flash::cp_async_wait<0>(); + __syncthreads(); + if (n_block > n_block_min) { + // Advance gK + tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tKgK, + tKsK, tKVcKV, tKVpKV); + // This cp_async_fence needs to be in the if block, otherwise the + // synchronization isn't right and we get race conditions. + cute::cp_async_fence(); + } + + // TODO: when we have key_padding_mask we'll need to Check_inf + masking_step == 0 + ? softmax_rescale_o( + scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2) + : softmax_rescale_o( + scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2); + + // Convert scores from fp32 to fp16/bf16 + Tensor rP = flash::convert_type(scores); + // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, + // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using + // m16n8k8. + Tensor tOrP = make_tensor( + rP.data(), flash::convert_layout_rowcol_Aregs( + rP.layout())); + int block_row_idx = m_block * (kBlockM / 16) + tidx / 32; + int block_col_idx = n_block * (kBlockN / 32); + if (Return_softmax) { + Tensor tOrP_copy = make_fragment_like(tOrP); + cute::copy(tOrP, tOrP_copy); + flash::apply_dropout( + tOrP_copy, params.p_dropout_in_uint8_t, seed, offset, block_row_idx, + block_col_idx, kNWarps); + flash::write_softmax_to_gmem(tOrP_copy, tPgP, gmem_tiled_copy_P); + tPgP.data() = tPgP.data() + (-kBlockN); + } + if (Is_dropout) { + flash::apply_dropout(tOrP, params.p_dropout_in_uint8_t, seed, offset, + block_row_idx, block_col_idx, kNWarps); + } + // if (cute::thread0()) { print(tOrP); } + + flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, + smem_tiled_copy_V, smem_thr_copy_V); + // if (cute::thread0()) { print(scores); } + + // This check is at the end of the loop since we always have at least 1 + // iteration + if (n_masking_steps > 1 && n_block <= n_block_min) { + --n_block; + break; + } + } + + // These are the iterations where we don't need masking on S + for (; n_block >= n_block_min; --n_block) { + Tensor acc_s = partition_fragment_C( + tiled_mma, + Shape, Int>{}); // (MMA=4, MMA_M, MMA_N) + clear(acc_s); + flash::cp_async_wait<0>(); + __syncthreads(); + // Advance gV + tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tVgV, tVsV, + tKVcKV, tKVpKV); + cute::cp_async_fence(); + + flash::gemm( + acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, + smem_tiled_copy_K, smem_thr_copy_Q, smem_thr_copy_K); + + flash::cp_async_wait<0>(); + __syncthreads(); + if (n_block > n_block_min) { + // Advance gK + tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tKgK, + tKsK, tKVcKV, tKVpKV); + // This cp_async_fence needs to be in the if block, otherwise the + // synchronization isn't right and we get race conditions. + cute::cp_async_fence(); + } + + // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, + // MMA_N)) + Tensor scores = make_tensor( + acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + + if (Has_alibi) { + flash::apply_alibi( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, alibi_slope); + } + + if (Is_local && n_block * kBlockN < + (m_block + 1) * kBlockM + binfo.actual_seqlen_k - + binfo.actual_seqlen_q + params.window_size_right) { + flash::apply_mask_local( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, params.window_size_left, + params.window_size_right); + } + + softmax_rescale_o( + scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2); + + Tensor rP = flash::convert_type(scores); + // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, + // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using + // m16n8k8. + Tensor tOrP = make_tensor( + rP.data(), flash::convert_layout_rowcol_Aregs( + rP.layout())); + int block_row_idx = m_block * (kBlockM / 16) + tidx / 32; + int block_col_idx = n_block * (kBlockN / 32); + if (Return_softmax) { + Tensor tOrP_copy = make_fragment_like(tOrP); + cute::copy(tOrP, tOrP_copy); + flash::apply_dropout( + tOrP_copy, params.p_dropout_in_uint8_t, seed, offset, block_row_idx, + block_col_idx, kNWarps); + flash::write_softmax_to_gmem(tOrP_copy, tPgP, gmem_tiled_copy_P); + tPgP.data() = tPgP.data() + (-kBlockN); + } + if (Is_dropout) { + flash::apply_dropout(tOrP, params.p_dropout_in_uint8_t, seed, offset, + block_row_idx, block_col_idx, kNWarps); + } + + flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, + smem_tiled_copy_V, smem_thr_copy_V); + } + + // Epilogue + + // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, + // MMA_K)) + Tensor acc_o_rowcol = make_tensor( + acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout())); + Tensor lse = make_fragment_like(scores_sum); +#pragma unroll + for (int mi = 0; mi < size<0>(acc_o_rowcol); ++mi) { + float sum = scores_sum(mi); + float inv_sum = (sum == 0.f || sum != sum) ? 1.f : 1.f / sum; + lse(mi) = (sum == 0.f || sum != sum) + ? INFINITY + : scores_max(mi) * params.scale_softmax + __logf(sum); + float scale = !Is_dropout ? inv_sum : inv_sum * params.rp_dropout; +#pragma unroll + for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) { + acc_o_rowcol(mi, ni) *= scale; + } + } + + // if (cute::thread0()) { print(acc_o_rowcol); } + + // Convert acc_o from fp32 to fp16/bf16 + Tensor rO = flash::convert_type(acc_o); + Tensor sO = make_tensor( + sQ.data(), typename Kernel_traits::SmemLayoutO{}); // (SMEM_M,SMEM_N) + // Partition sO to match the accumulator partitioning + auto smem_tiled_copy_O = + make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomO{}, tiled_mma); + auto smem_thr_copy_O = smem_tiled_copy_O.get_thread_slice(tidx); + Tensor taccOrO = + smem_thr_copy_O.retile_S(rO); // ((Atom,AtomNum), MMA_M, MMA_N) + Tensor taccOsO = + smem_thr_copy_O.partition_D(sO); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + // sO has the same size as sQ, so we don't need to sync here. + if (Kernel_traits::Share_Q_K_smem) { + __syncthreads(); + } + + cute::copy(smem_tiled_copy_O, taccOrO, taccOsO); + + const index_t row_offset_o = + binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + const index_t row_offset_lse = + (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM; + Tensor gO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gLSE = make_tensor( + make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + + row_offset_lse), + Shape>{}, Stride<_1>{}); + + typename Kernel_traits::GmemTiledCopyO gmem_tiled_copy_O; + auto gmem_thr_copy_O = gmem_tiled_copy_O.get_thread_slice(tidx); + Tensor tOsO = + gmem_thr_copy_O.partition_S(sO); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tOgO = gmem_thr_copy_O.partition_D(gO); + + __syncthreads(); + + Tensor tOrO = make_tensor(shape(tOgO)); + cute::copy(gmem_tiled_copy_O, tOsO, tOrO); + + Tensor caccO = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor taccOcO = thr_mma.partition_C(caccO); // (MMA,MMA_M,MMA_K) + static_assert(decltype(size<0>(taccOcO))::value == 4); + // Convert to ((2, 2), MMA_M, MMA_K) then take only the row indices. + Tensor taccOcO_row = + logical_divide(taccOcO, Shape<_2>{})(make_coord(0, _), _, 0); + CUTE_STATIC_ASSERT_V(size(lse) == size(taccOcO_row)); // MMA_M + if (get<1>(taccOcO_row(0)) == 0) { +#pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + const int row = get<0>(taccOcO_row(mi)); + if (row < binfo.actual_seqlen_q - m_block * kBlockM) { + gLSE(row) = lse(mi); + } + } + } + + // Construct identity layout for sO + Tensor cO = make_identity_tensor( + make_shape(size<0>(sO), size<1>(sO))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + // Repeat the partitioning with identity layouts + Tensor tOcO = + gmem_thr_copy_O.partition_D(cO); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) + Tensor tOpO = make_tensor(make_shape(size<2>(tOgO))); + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tOpO); ++k) { + tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d; + } + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy(gmem_tiled_copy_O, tOrO, tOgO, tOcO, tOpO, + binfo.actual_seqlen_q - m_block * kBlockM); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void +compute_attn_1rowblock_splitkv(const Params ¶ms, const int bidb, + const int bidh, const int m_block, + const int n_split_idx, const int num_n_splits) { + + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + constexpr int kNWarps = Kernel_traits::kNWarps; + + using GmemTiledCopyO = + std::conditional_t; + using ElementO = std::conditional_t; + + const BlockInfo binfo(params, bidb); + // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { + // printf("Is_even_MN = %d, is_cumulativ = %d, seqlen_k_cache = %d, + // actual_seqlen_k = %d\n", Is_even_MN, params.is_seqlens_k_cumulative, + // binfo.seqlen_k_cache, binfo.actual_seqlen_k); } if (threadIdx.x == 0 && + // blockIdx.y == 1 && blockIdx.z == 0) { printf("params.knew_ptr = %p, + // seqlen_k_cache + seqlen_knew = %d\n", params.knew_ptr, binfo.seqlen_k_cache + // + (params.knew_ptr == nullptr ? 0 : params.seqlen_knew)); } + if (m_block * kBlockM >= binfo.actual_seqlen_q) + return; + + const int n_blocks_per_split = + ((params.seqlen_k + kBlockN - 1) / kBlockN + num_n_splits - 1) / + num_n_splits; + const int n_block_min = + !Is_local ? n_split_idx * n_blocks_per_split + : std::max(n_split_idx * n_blocks_per_split, + (m_block * kBlockM + binfo.actual_seqlen_k - + binfo.actual_seqlen_q - params.window_size_left) / + kBlockN); + int n_block_max = std::min(cute::ceil_div(binfo.actual_seqlen_k, kBlockN), + (n_split_idx + 1) * n_blocks_per_split); + if (Is_causal || Is_local) { + n_block_max = std::min( + n_block_max, + cute::ceil_div((m_block + 1) * kBlockM + binfo.actual_seqlen_k - + binfo.actual_seqlen_q + params.window_size_right, + kBlockN)); + } + if (n_block_min >= + n_block_max) { // This also covers the case where n_block_max <= 0 + // We exit early and write 0 to gOaccum and -inf to gLSEaccum. + // Otherwise we might read OOB elements from gK and gV, + // or get wrong results when we combine gOaccum from different blocks. + const index_t row_offset_o = + binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + const index_t row_offset_oaccum = + (((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + + m_block * kBlockM) * + params.d_rounded; + const index_t row_offset_lseaccum = + ((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + + m_block * kBlockM; + Tensor gOaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(Split ? params.oaccum_ptr + : params.o_ptr) + + (Split ? row_offset_oaccum : row_offset_o)), + Shape, Int>{}, + make_stride(Split ? kHeadDim : params.o_row_stride, _1{})); + Tensor gLSEaccum = + make_tensor(make_gmem_ptr(reinterpret_cast( + Split ? params.softmax_lseaccum_ptr + : params.softmax_lse_ptr) + + row_offset_lseaccum), + Shape>{}, Stride<_1>{}); + + GmemTiledCopyO gmem_tiled_copy_Oaccum; + auto gmem_thr_copy_Oaccum = gmem_tiled_copy_Oaccum.get_thread_slice(tidx); + Tensor tOgOaccum = gmem_thr_copy_Oaccum.partition_D(gOaccum); + Tensor tOrOaccum = make_tensor(shape(tOgOaccum)); + clear(tOrOaccum); + // Construct identity layout for sO + Tensor cO = make_identity_tensor(make_shape( + size<0>(gOaccum), size<1>(gOaccum))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + // Repeat the partitioning with identity layouts + Tensor tOcO = gmem_thr_copy_Oaccum.partition_D(cO); + Tensor tOpO = make_tensor(make_shape(size<2>(tOgOaccum))); + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tOpO); ++k) { + tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d; + } + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy( + gmem_tiled_copy_Oaccum, tOrOaccum, tOgOaccum, tOcO, tOpO, + binfo.actual_seqlen_q - m_block * kBlockM); +#pragma unroll + for (int m = 0; m < size<1>(tOgOaccum); ++m) { + const int row = get<0>(tOcO(0, m, 0)); + if (row < binfo.actual_seqlen_q - m_block * kBlockM && + get<1>(tOcO(0, m, 0)) == 0) { + gLSEaccum(row) = Split ? -INFINITY : INFINITY; + } + } + return; + } + + // We iterate over the blocks in reverse order. This is because the last block + // is the only one that needs masking when we read K and V from global memory. + // Moreover, iterating in reverse might save us 1 register (we just need + // n_block instead of both n_block and n_block_max). + + const index_t row_offset_q = + binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride; + // We move K and V to the last block. + const int bidb_cache = + params.cache_batch_idx == nullptr ? bidb : params.cache_batch_idx[bidb]; + const index_t row_offset_k = + binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb_cache) + + (n_block_max - 1) * kBlockN * params.k_row_stride + + (bidh / params.h_h_k_ratio) * params.k_head_stride; + const index_t row_offset_v = + binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb_cache) + + (n_block_max - 1) * kBlockN * params.v_row_stride + + (bidh / params.h_h_k_ratio) * params.v_head_stride; + + Tensor gQ = make_tensor( + make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), + Shape, Int>{}, + make_stride(params.q_row_stride, _1{})); + Tensor gK = make_tensor( + make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), + Shape, Int>{}, + make_stride(params.k_row_stride, _1{})); + // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { printf("k_ptr + // = %p, row_offset_k = %d, gK_ptr = %p\n", params.k_ptr, row_offset_k, + // gK.data()); } + Tensor gV = make_tensor( + make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), + Shape, Int>{}, + make_stride(params.v_row_stride, _1{})); + + Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutQ{}); + Tensor sK = + make_tensor(sQ.data() + size(sQ), typename Kernel_traits::SmemLayoutKV{}); + Tensor sV = + make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{}); + Tensor sVt = + make_tensor(sV.data(), typename Kernel_traits::SmemLayoutVtransposed{}); + Tensor sVtNoSwizzle = make_tensor( + sV.data(), typename Kernel_traits::SmemLayoutVtransposedNoSwizzle{}); + + typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; + auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); + + Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); + Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); + Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) + Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); + Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) + Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); + + typename Kernel_traits::TiledMma tiled_mma; + auto thr_mma = tiled_mma.get_thread_slice(tidx); + Tensor tSrQ = thr_mma.partition_fragment_A(sQ); // (MMA,MMA_M,MMA_K) + Tensor tSrK = thr_mma.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) + Tensor tOrVt = + thr_mma.partition_fragment_B(sVtNoSwizzle); // (MMA, MMA_K,MMA_N) + + Tensor acc_o = partition_fragment_C( + tiled_mma, Shape, Int>{}); // MMA, MMA_M, MMA_K + + // + // Copy Atom retiling + // + + auto smem_tiled_copy_Q = + make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma); + auto smem_thr_copy_Q = smem_tiled_copy_Q.get_thread_slice(tidx); + Tensor tSsQ = smem_thr_copy_Q.partition_S(sQ); + + auto smem_tiled_copy_K = + make_tiled_copy_B(typename Kernel_traits::SmemCopyAtom{}, tiled_mma); + auto smem_thr_copy_K = smem_tiled_copy_K.get_thread_slice(tidx); + Tensor tSsK = smem_thr_copy_K.partition_S(sK); + + auto smem_tiled_copy_V = make_tiled_copy_B( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma); + auto smem_thr_copy_V = smem_tiled_copy_V.get_thread_slice(tidx); + Tensor tOsVt = smem_thr_copy_V.partition_S(sVt); + + // TODO: this might need to change if we change the mma instruction in SM70 + Tensor scores_max = + make_tensor(Shape(acc_o)>>{}); + Tensor scores_sum = make_fragment_like(scores_max); + + // + // PREDICATES + // + + // // Allocate predicate tensors for m and n + // Tensor tQpQ = make_tensor(make_shape(size<1>(tQsQ), size<2>(tQsQ)), + // Stride<_1,_0>{}); Tensor tKVpKV = + // make_tensor(make_shape(size<1>(tKsK), size<2>(tKsK)), + // Stride<_1,_0>{}); + + // Construct identity layout for sQ and sK + Tensor cQ = make_identity_tensor( + make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor cKV = make_identity_tensor( + make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + + // Repeat the partitioning with identity layouts + Tensor tQcQ = gmem_thr_copy_QKV.partition_S( + cQ); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) + Tensor tKVcKV = gmem_thr_copy_QKV.partition_S( + cKV); // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k) + + // Allocate predicate tensors for k + Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); + Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); + + // Set predicates for k bounds + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tQpQ); ++k) { + tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; + } +#pragma unroll + for (int k = 0; k < size(tKVpKV); ++k) { + tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; + } + } + + // Prologue + + // Copy from Knew to K, optionally apply rotary embedding. + typename Kernel_traits::GmemTiledCopyRotcossin gmem_tiled_copy_rotary; + auto gmem_thr_copy_rotary = gmem_tiled_copy_rotary.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopyRotcossinCont + gmem_tiled_copy_rotary_cont; + auto gmem_thr_copy_rotary_cont = + gmem_tiled_copy_rotary_cont.get_thread_slice(tidx); + if constexpr (Append_KV) { + // Even if we have MQA / GQA, all threadblocks responsible for the same KV + // head are writing to gmem. Technically it's a race condition, but they all + // write the same content anyway, and it's safe. We want to do this so that + // all threadblocks can proceed right after they finish writing the KV + // cache. + const index_t row_offset_cossin = + ((n_block_max - 1) * kBlockN) * (params.rotary_dim / 2); + Tensor gCos = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_cos_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(params.rotary_dim / 2, _1{})); + Tensor gSin = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_sin_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(params.rotary_dim / 2, _1{})); + Tensor gCosCont = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_cos_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(params.rotary_dim / 2, _1{})); + Tensor gSinCont = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_sin_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(params.rotary_dim / 2, _1{})); + Tensor tRgCos = gmem_thr_copy_rotary.partition_S(gCos); + Tensor tRgSin = gmem_thr_copy_rotary.partition_S(gSin); + Tensor tRgCosCont = gmem_thr_copy_rotary_cont.partition_S(gCosCont); + Tensor tRgSinCont = gmem_thr_copy_rotary_cont.partition_S(gSinCont); + // if (cute::thread(0, 0)) { printf("rotary_cos_ptr = %p, gCos.data() = %p, + // tRgCos.data() = %p, rotary_dim = %d\n", params.rotary_cos_ptr, + // gCos.data(), tRgCos.data(), params.rotary_dim); } if (cute::thread(8, 0)) + // { print_tensor(gCos); } if (cute::thread(0, 0)) { print_tensor(tRgCos); } + + const index_t row_offset_knew = + binfo.k_offset(params.knew_batch_stride, params.knew_row_stride, bidb) + + ((n_block_max - 1) * kBlockN) * params.knew_row_stride + + (bidh / params.h_h_k_ratio) * params.knew_head_stride; + const index_t row_offset_vnew = + binfo.k_offset(params.vnew_batch_stride, params.vnew_row_stride, bidb) + + ((n_block_max - 1) * kBlockN) * params.vnew_row_stride + + (bidh / params.h_h_k_ratio) * params.vnew_head_stride; + // Subtract seqlen_k_cache * row stride so that conceptually gK and gKnew + // "line up". When we access them, e.g. if gK has 128 rows and gKnew has 64 + // rows, we access gK[:128] and gKNew[128:128 + 64]. This maps to accessing + // the first 64 rows of knew_ptr. + Tensor gKnew = make_tensor( + make_gmem_ptr(reinterpret_cast(params.knew_ptr) + + row_offset_knew - + binfo.seqlen_k_cache * params.knew_row_stride), + Shape, Int>{}, + make_stride(params.knew_row_stride, _1{})); + // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { + // printf("knew_ptr = %p, row_offset_knew = %d, gKnew_ptr = %p\n", + // params.knew_ptr, row_offset_knew, gKnew.data()); } + Tensor gVnew = make_tensor( + make_gmem_ptr(reinterpret_cast(params.vnew_ptr) + + row_offset_vnew - + binfo.seqlen_k_cache * params.vnew_row_stride), + Shape, Int>{}, + make_stride(params.vnew_row_stride, _1{})); + Tensor tKgKnew = + gmem_thr_copy_QKV.partition_S(gKnew); // (KCPY, KCPY_N, KCPY_K) + Tensor tVgVnew = + gmem_thr_copy_QKV.partition_S(gVnew); // (VCPY, VCPY_N, VCPY_K) + + const int n_block_copy_min = + std::max(n_block_min, binfo.seqlen_k_cache / kBlockN); + for (int n_block = n_block_max - 1; n_block >= n_block_copy_min; + n_block--) { + flash::copy_w_min_idx( + tVgVnew, tVgV, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN, + binfo.seqlen_k_cache - n_block * kBlockN); + tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); + tVgVnew.data() = + tVgVnew.data() + (-int(kBlockN * params.vnew_row_stride)); + if (params.rotary_dim == 0) { + flash::copy_w_min_idx( + tKgKnew, tKgK, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN, + binfo.seqlen_k_cache - n_block * kBlockN); + } else { + if (params.is_rotary_interleaved) { + // Don't clear OOB_K because we're writing to global memory + flash::copy_rotary_interleaved( + tKgKnew, tKgK, tRgCos, tRgSin, tKVcKV, + binfo.actual_seqlen_k - n_block * kBlockN, + binfo.seqlen_k_cache - n_block * kBlockN, params.d, + params.rotary_dim); + tRgCos.data() = + tRgCos.data() + (-int(kBlockN * params.rotary_dim / 2)); + tRgSin.data() = + tRgSin.data() + (-int(kBlockN * params.rotary_dim / 2)); + } else { + // Don't clear OOB_K because we're writing to global memory + flash::copy_rotary_contiguous( + tKgKnew, tKgK, tRgCosCont, tRgSinCont, tKVcKV, + binfo.actual_seqlen_k - n_block * kBlockN, + binfo.seqlen_k_cache - n_block * kBlockN, params.d, + params.rotary_dim); + tRgCosCont.data() = + tRgCosCont.data() + (-int(kBlockN * params.rotary_dim / 2)); + tRgSinCont.data() = + tRgSinCont.data() + (-int(kBlockN * params.rotary_dim / 2)); + } + } + tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); + tKgKnew.data() = + tKgKnew.data() + (-int(kBlockN * params.knew_row_stride)); + } + // Need this before we can read in K again, so that we'll see the updated K + // values. + __syncthreads(); + if (n_block_max > n_block_copy_min) { + tKgK.data() = tKgK.data() + (n_block_max - n_block_copy_min) * kBlockN * + params.k_row_stride; + tVgV.data() = tVgV.data() + (n_block_max - n_block_copy_min) * kBlockN * + params.v_row_stride; + } + } + + // Read Q from gmem to smem, optionally apply rotary embedding. + Tensor tQrQ = make_fragment_like(tQgQ); + if (!Append_KV || params.rotary_dim == 0) { + // We don't need to clear the sQ smem tiles since we'll only write out the + // valid outputs + flash::copy( + gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + } else { + const index_t row_offset_cossin = + (binfo.seqlen_k_cache + + (Is_causal || Is_local ? m_block * kBlockM : 0)) * + (params.rotary_dim / 2); + // If not causal, all the queries get the same the cos/sin, taken at + // location seqlen_k_cache. We do this by setting the row stride of gCos / + // gSin to 0. + Tensor gCos = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_cos_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{})); + Tensor gSin = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_sin_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{})); + Tensor gCosCont = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_cos_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{})); + Tensor gSinCont = make_tensor( + make_gmem_ptr(reinterpret_cast(params.rotary_sin_ptr) + + row_offset_cossin), + Shape, Int>{}, + make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{})); + Tensor tRgCos = gmem_thr_copy_rotary.partition_S(gCos); + Tensor tRgSin = gmem_thr_copy_rotary.partition_S(gSin); + Tensor tRgCosCont = gmem_thr_copy_rotary_cont.partition_S(gCosCont); + Tensor tRgSinCont = gmem_thr_copy_rotary_cont.partition_S(gSinCont); + if (params.is_rotary_interleaved) { + flash::copy_rotary_interleaved( + tQgQ, tQsQ, tRgCos, tRgSin, tQcQ, + binfo.actual_seqlen_q - m_block * kBlockM, 0, params.d, + params.rotary_dim); + } else { + flash::copy_rotary_contiguous( + tQgQ, tQsQ, tRgCosCont, tRgSinCont, tQcQ, + binfo.actual_seqlen_q - m_block * kBlockM, 0, params.d, + params.rotary_dim); + } + } + + int n_block = n_block_max - 1; + // We don't need to clear the sK smem tiles since we'll mask out the scores + // anyway. + flash::copy(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, + tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + cute::cp_async_fence(); + + // flash::cp_async_wait<0>(); + // __syncthreads(); + // if (tidx == 0 && blockIdx.y == 0 && blockIdx.z == 0) { print(tKsK); } + // __syncthreads(); + + clear(acc_o); + + float alibi_slope = + !Has_alibi ? 0.0f + : reinterpret_cast(params.alibi_slopes_ptr) + [bidb * params.alibi_slopes_batch_stride + bidh] / + params.scale_softmax; + + // For performance reason, we separate out two kinds of iterations: + // those that need masking on S, and those that don't. + // We need masking on S for the very last block when K and V has length not + // multiple of kBlockN. We also need masking on S if it's causal, for the last + // ceil_div(kBlockM, kBlockN) blocks. We will have at least 1 "masking" + // iteration. + + // If not even_N, then seqlen_k might end in the middle of a block. In that + // case we need to mask 2 blocks (e.g. when kBlockM == kBlockN), not just 1. + constexpr int n_masking_steps = + (!Is_causal && !Is_local) + ? 1 + : ((Is_even_MN && Is_causal) ? cute::ceil_div(kBlockM, kBlockN) + : cute::ceil_div(kBlockM, kBlockN) + 1); +#pragma unroll + for (int masking_step = 0; masking_step < n_masking_steps; + ++masking_step, --n_block) { + Tensor acc_s = partition_fragment_C( + tiled_mma, + Shape, Int>{}); // (MMA=4, MMA_M, MMA_N) + clear(acc_s); + flash::cp_async_wait<0>(); + __syncthreads(); + + // Advance gV + if (masking_step > 0) { + tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tVgV, + tVsV, tKVcKV, tKVpKV); + } else { + // Clear the smem tiles to account for predicated off loads + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + } + cute::cp_async_fence(); + + flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, + smem_tiled_copy_K, smem_thr_copy_Q, smem_thr_copy_K); + // if (cute::thread0()) { print(acc_s); } + + // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, + // MMA_N)) + Tensor scores = make_tensor( + acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + + if (Has_alibi) { + flash::apply_alibi( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, alibi_slope); + } + + // if (cute::thread0()) { print(scores); } + // We don't put the masking before the matmul S = Q K^T because we don't + // clear sK for rows outside actual_seqlen_k. So those rows could have Inf / + // NaN, and the matmul can produce Inf / NaN. + if (!Is_causal && !Is_local) { + if (!Is_even_MN) { + flash::apply_mask(scores, binfo.actual_seqlen_k - n_block * kBlockN); + } + } else { + flash::apply_mask_local( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, params.window_size_left, + params.window_size_right); + } + + flash::cp_async_wait<0>(); + __syncthreads(); + // if (tidx == 0 && blockIdx.y == 0 && blockIdx.z == 0) { print(tVsV); } + // __syncthreads(); + + if (n_block > n_block_min) { + // Advance gK + tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tKgK, + tKsK, tKVcKV, tKVpKV); + // This cp_async_fence needs to be in the if block, otherwise the + // synchronization isn't right and we get race conditions. + cute::cp_async_fence(); + } + + // We have key_padding_mask so we'll need to Check_inf + masking_step == 0 + ? softmax_rescale_o( + scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2) + : softmax_rescale_o( + scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2); + // if (cute::thread0()) { print(scores_max); print(scores_sum); + // print(scores); } + + // Convert scores from fp32 to fp16/bf16 + Tensor rP = flash::convert_type(scores); + // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, + // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using + // m16n8k8. + Tensor tOrP = make_tensor( + rP.data(), flash::convert_layout_rowcol_Aregs( + rP.layout())); + + flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, + smem_tiled_copy_V, smem_thr_copy_V); + // if (cute::thread0()) { print(scores); } + + // This check is at the end of the loop since we always have at least 1 + // iteration + if (n_masking_steps > 1 && n_block <= n_block_min) { + --n_block; + break; + } + } + + // These are the iterations where we don't need masking on S + for (; n_block >= n_block_min; --n_block) { + Tensor acc_s = partition_fragment_C( + tiled_mma, + Shape, Int>{}); // (MMA=4, MMA_M, MMA_N) + clear(acc_s); + flash::cp_async_wait<0>(); + __syncthreads(); + // Advance gV + tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tVgV, tVsV, + tKVcKV, tKVpKV); + cute::cp_async_fence(); + + flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, + smem_tiled_copy_K, smem_thr_copy_Q, smem_thr_copy_K); + + flash::cp_async_wait<0>(); + __syncthreads(); + if (n_block > n_block_min) { + // Advance gK + tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tKgK, + tKsK, tKVcKV, tKVpKV); + // This cp_async_fence needs to be in the if block, otherwise the + // synchronization isn't right and we get race conditions. + cute::cp_async_fence(); + } + + // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, + // MMA_N)) + Tensor scores = make_tensor( + acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + + if (Has_alibi) { + flash::apply_alibi( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, alibi_slope); + } + + if (Is_local && n_block * kBlockN < + (m_block + 1) * kBlockM + binfo.actual_seqlen_k - + binfo.actual_seqlen_q + params.window_size_right) { + flash::apply_mask_local( + scores, n_block * kBlockN, binfo.actual_seqlen_k, + m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, kNWarps * 16, params.window_size_left, + params.window_size_right); + } + softmax_rescale_o( + scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2); + + Tensor rP = flash::convert_type(scores); + // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, + // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using + // m16n8k8. + Tensor tOrP = make_tensor( + rP.data(), flash::convert_layout_rowcol_Aregs( + rP.layout())); + + flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, + smem_tiled_copy_V, smem_thr_copy_V); + } + + // Epilogue + + // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, + // MMA_K)) + Tensor acc_o_rowcol = make_tensor( + acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout())); + // if (cute::thread0()) { print(acc_o_rowcol); } + Tensor lse = make_fragment_like(scores_sum); +#pragma unroll + for (int mi = 0; mi < size<0>(acc_o_rowcol); ++mi) { + float sum = scores_sum(mi); + float inv_sum = (sum == 0.f || sum != sum) ? 1.f : 1.f / sum; + lse(mi) = (sum == 0.f || sum != sum) + ? (Split ? -INFINITY : INFINITY) + : scores_max(mi) * params.scale_softmax + __logf(sum); + float scale = inv_sum; +#pragma unroll + for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) { + acc_o_rowcol(mi, ni) *= scale; + } + } + // if (cute::thread0()) { print(lse); } + // if (cute::thread0()) { print(acc_o_rowcol); } + + Tensor sOaccum = + make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutO{}); // (SMEM_M,SMEM_N) + // Partition sO to match the accumulator partitioning + using SmemTiledCopyO = + std::conditional_t; + auto smem_tiled_copy_Oaccum = make_tiled_copy_C(SmemTiledCopyO{}, tiled_mma); + auto smem_thr_copy_Oaccum = smem_tiled_copy_Oaccum.get_thread_slice(tidx); + Tensor rO = flash::convert_type(acc_o); + Tensor taccOrOaccum = + smem_thr_copy_Oaccum.retile_S(rO); // ((Atom,AtomNum), MMA_M, MMA_N) + Tensor taccOsOaccum = smem_thr_copy_Oaccum.partition_D( + sOaccum); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + // sOaccum is larger than sQ, so we need to syncthreads here + // TODO: allocate enough smem for sOaccum + if constexpr (Split) { + __syncthreads(); + } + + cute::copy(smem_tiled_copy_Oaccum, taccOrOaccum, taccOsOaccum); + + const index_t row_offset_o = + binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + const index_t row_offset_oaccum = + (((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + + m_block * kBlockM) * + params.d_rounded; + const index_t row_offset_lseaccum = + ((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + + m_block * kBlockM; + + Tensor gOaccum = + make_tensor(make_gmem_ptr(reinterpret_cast( + Split ? params.oaccum_ptr : params.o_ptr) + + (Split ? row_offset_oaccum : row_offset_o)), + Shape, Int>{}, + make_stride(Split ? kHeadDim : params.o_row_stride, _1{})); + Tensor gLSEaccum = + make_tensor(make_gmem_ptr(reinterpret_cast( + Split ? params.softmax_lseaccum_ptr + : params.softmax_lse_ptr) + + row_offset_lseaccum), + Shape>{}, Stride<_1>{}); + // if (tidx == 0) { printf("row_offset_o = %d, bidh = %d, gOaccum = %p\n", + // row_offset_o, bidh, gOaccum.data()); } + + GmemTiledCopyO gmem_tiled_copy_Oaccum; + auto gmem_thr_copy_Oaccum = gmem_tiled_copy_Oaccum.get_thread_slice(tidx); + Tensor tOsOaccum = gmem_thr_copy_Oaccum.partition_S( + sOaccum); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tOgOaccum = gmem_thr_copy_Oaccum.partition_D(gOaccum); + + __syncthreads(); + + Tensor tOrOaccum = make_tensor(shape(tOgOaccum)); + cute::copy(gmem_tiled_copy_Oaccum, tOsOaccum, tOrOaccum); + + Tensor caccO = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor taccOcO = thr_mma.partition_C(caccO); // (MMA,MMA_M,MMA_K) + static_assert(decltype(size<0>(taccOcO))::value == 4); + // Convert to ((2, 2), MMA_M, MMA_K) then take only the row indices. + Tensor taccOcO_row = + logical_divide(taccOcO, Shape<_2>{})(make_coord(0, _), _, 0); + CUTE_STATIC_ASSERT_V(size(lse) == size(taccOcO_row)); // MMA_M + if (get<1>(taccOcO_row(0)) == 0) { +#pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + const int row = get<0>(taccOcO_row(mi)); + if (row < binfo.actual_seqlen_q - m_block * kBlockM) { + gLSEaccum(row) = lse(mi); + } + } + } + + // Construct identity layout for sO + Tensor cO = make_identity_tensor(make_shape( + size<0>(sOaccum), size<1>(sOaccum))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + // Repeat the partitioning with identity layouts + Tensor tOcO = gmem_thr_copy_Oaccum.partition_D( + cO); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) + Tensor tOpO = make_tensor(make_shape(size<2>(tOgOaccum))); + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tOpO); ++k) { + tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d; + } + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy(gmem_tiled_copy_Oaccum, tOrOaccum, + tOgOaccum, tOcO, tOpO, + binfo.actual_seqlen_q - m_block * kBlockM); + // __syncthreads(); + // if (cute::thread0()) { print(tOgOaccum); } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_attn(const Params ¶ms) { + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + + // We want the fwd and bwd to generate the same dropout pattern (RNG), without + // restricting them to have the same number of threads or have to traverse the + // attention matrix in the same order. In the Philox RNG, we use the offset to + // store the batch, head, and the lane id (within a warp). We use the + // subsequence to store the location of the 16 x 32 blocks within the + // attention matrix. This way, as long as we have the batch, head, and the + // location of the 16 x 32 block within the attention matrix, we can generate + // the exact same dropout pattern. + + flash::compute_attn_1rowblock(params, bidb, bidh, m_block); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void compute_attn_splitkv(const Params ¶ms) { + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = Split ? blockIdx.z / params.h : blockIdx.y; + // The block index for the head. + const int bidh = Split ? blockIdx.z - bidb * params.h : blockIdx.z; + const int n_split_idx = Split ? blockIdx.y : 0; + const int num_n_splits = Split ? gridDim.y : 1; + flash::compute_attn_1rowblock_splitkv(params, bidb, bidh, m_block, + n_split_idx, num_n_splits); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void combine_attn_seqk_parallel(const Params ¶ms) { + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + constexpr int kMaxSplits = 1 << Log_max_splits; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + constexpr int kNThreads = Kernel_traits::kNThreads; + + static_assert(kMaxSplits <= 128, "kMaxSplits must be <= 128"); + static_assert(kBlockM == 4 || kBlockM == 8 || kBlockM == 16 || kBlockM == 32, + "kBlockM must be 4, 8, 16 or 32"); + static_assert(kNThreads == 128, "We assume that each block has 128 threads"); + + // Shared memory. + // kBlockM + 1 instead of kBlockM to reduce bank conflicts. + __shared__ ElementAccum sLSE[kMaxSplits][kBlockM + 1]; + + // The thread and block index. + const int tidx = threadIdx.x; + const int bidx = blockIdx.x; + + const index_t row_offset_lse = bidx * kBlockM; + Tensor gLSEaccum = + make_tensor(make_gmem_ptr(reinterpret_cast( + params.softmax_lseaccum_ptr) + + row_offset_lse), + Shape, Int>{}, + make_stride(params.b * params.h * params.seqlen_q, _1{})); + Tensor gLSE = make_tensor( + make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + + row_offset_lse), + Shape>{}, Stride<_1>{}); + constexpr int kNLsePerThread = + (kMaxSplits * kBlockM + kNThreads - 1) / kNThreads; + + // Read the LSE values from gmem and store them in shared memory, then + // tranpose them. + constexpr int kRowsPerLoadLSE = kNThreads / kBlockM; +#pragma unroll + for (int l = 0; l < kNLsePerThread; ++l) { + const int row = l * kRowsPerLoadLSE + tidx / kBlockM; + const int col = tidx % kBlockM; + ElementAccum lse = + (row < params.num_splits && + col < params.b * params.h * params.seqlen_q - bidx * kBlockM) + ? gLSEaccum(row, col) + : -INFINITY; + if (row < kMaxSplits) { + sLSE[row][col] = lse; + } + // if (bidx == 0 && tidx < 32) { printf("tidx = %d, row = %d, col = %d, lse + // = %f\n", tidx, row, col, lse); } + } + // if (bidx == 1 && tidx < 32) { printf("tidx = %d, row_offset_lse = %d, lse = + // %f\n", tidx, row_offset_lse, lse_accum(0)); } + __syncthreads(); + Tensor lse_accum = make_tensor(Shape>{}); + constexpr int kRowsPerLoadTranspose = std::min(kRowsPerLoadLSE, kMaxSplits); + // To make sure that kMaxSplits is within 1 warp: we decide how many elements + // within kMaxSplits each thread should hold. If kMaxSplits = 16, then each + // thread holds 2 elements (128 threads, kBlockM rows, so each time we load we + // can load 128 / kBlockM rows). constexpr int kThreadsPerSplit = kMaxSplits / + // kRowsPerLoadTranspose; static_assert(kThreadsPerSplit <= 32); + static_assert(kRowsPerLoadTranspose <= 32); + static_assert(kNLsePerThread * kRowsPerLoadTranspose <= kMaxSplits); +#pragma unroll + for (int l = 0; l < kNLsePerThread; ++l) { + const int row = l * kRowsPerLoadTranspose + tidx % kRowsPerLoadTranspose; + const int col = tidx / kRowsPerLoadTranspose; + lse_accum(l) = + (row < kMaxSplits && col < kBlockM) ? sLSE[row][col] : -INFINITY; + // if (bidx == 0 && tidx < 32) { printf("tidx = %d, row = %d, col = %d, lse + // = %f\n", tidx, row, col, lse_accum(l)); } + } + + // Compute the logsumexp of the LSE along the split dimension. + ElementAccum lse_max = lse_accum(0); +#pragma unroll + for (int l = 1; l < kNLsePerThread; ++l) { + lse_max = max(lse_max, lse_accum(l)); + } + MaxOp max_op; + lse_max = Allreduce::run(lse_max, max_op); + lse_max = + lse_max == -INFINITY ? 0.0f : lse_max; // In case all local LSEs are -inf + float lse_sum = expf(lse_accum(0) - lse_max); +#pragma unroll + for (int l = 1; l < kNLsePerThread; ++l) { + lse_sum += expf(lse_accum(l) - lse_max); + } + SumOp sum_op; + lse_sum = Allreduce::run(lse_sum, sum_op); + // For the case where all local lse == -INFINITY, we want to set lse_logsum to + // INFINITY. Otherwise lse_logsum is log(0.0) = -INFINITY and we get NaN when + // we do lse_accum(l) - lse_logsum. + ElementAccum lse_logsum = (lse_sum == 0.f || lse_sum != lse_sum) + ? INFINITY + : logf(lse_sum) + lse_max; + // if (bidx == 0 && tidx < 32) { printf("tidx = %d, lse = %f, lse_max = %f, + // lse_logsum = %f\n", tidx, lse_accum(0), lse_max, lse_logsum); } + if (tidx % kRowsPerLoadTranspose == 0 && + tidx / kRowsPerLoadTranspose < kBlockM) { + gLSE(tidx / kRowsPerLoadTranspose) = lse_logsum; + } +// Store the scales exp(lse - lse_logsum) in shared memory. +#pragma unroll + for (int l = 0; l < kNLsePerThread; ++l) { + const int row = l * kRowsPerLoadTranspose + tidx % kRowsPerLoadTranspose; + const int col = tidx / kRowsPerLoadTranspose; + if (row < params.num_splits && col < kBlockM) { + sLSE[row][col] = expf(lse_accum(l) - lse_logsum); + } + } + __syncthreads(); + + const index_t row_offset_oaccum = bidx * kBlockM * params.d_rounded; + Tensor gOaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.oaccum_ptr) + + row_offset_oaccum), + Shape, Int>{}, Stride, _1>{}); + constexpr int kBlockN = kNThreads / kBlockM; + using GmemLayoutAtomOaccum = + Layout, Int>, Stride, _1>>; + using GmemTiledCopyOaccum = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtomOaccum{}, + Layout>{})); // Val layout, 4 vals per store + GmemTiledCopyOaccum gmem_tiled_copy_Oaccum; + auto gmem_thr_copy_Oaccum = gmem_tiled_copy_Oaccum.get_thread_slice(tidx); + Tensor tOgOaccum = gmem_thr_copy_Oaccum.partition_S(gOaccum); + Tensor tOrO = make_tensor(shape(tOgOaccum)); + Tensor tOrOaccum = make_tensor(shape(tOgOaccum)); + clear(tOrO); + + // Predicates + Tensor cOaccum = make_identity_tensor(Shape, Int>{}); + // Repeat the partitioning with identity layouts + Tensor tOcOaccum = gmem_thr_copy_Oaccum.partition_S(cOaccum); + Tensor tOpOaccum = make_tensor(make_shape(size<2>(tOgOaccum))); + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tOpOaccum); ++k) { + tOpOaccum(k) = get<1>(tOcOaccum(0, 0, k)) < params.d; + } + } + // Load Oaccum in then scale and accumulate to O + for (int split = 0; split < params.num_splits; ++split) { + flash::copy( + gmem_tiled_copy_Oaccum, tOgOaccum, tOrOaccum, tOcOaccum, tOpOaccum, + params.b * params.h * params.seqlen_q - bidx * kBlockM); +#pragma unroll + for (int m = 0; m < size<1>(tOrOaccum); ++m) { + int row = get<0>(tOcOaccum(0, m, 0)); + ElementAccum lse_scale = sLSE[split][row]; +#pragma unroll + for (int k = 0; k < size<2>(tOrOaccum); ++k) { +#pragma unroll + for (int i = 0; i < size<0>(tOrOaccum); ++i) { + tOrO(i, m, k) += lse_scale * tOrOaccum(i, m, k); + } + } + // if (cute::thread0()) { printf("lse_scale = %f, %f\n", sLSE[split][0], + // sLSE[split][1]); print(tOrOaccum); } + } + tOgOaccum.data() = tOgOaccum.data() + + params.b * params.h * params.seqlen_q * params.d_rounded; + } + // if (cute::thread0()) { print_tensor(tOrO); } + + Tensor rO = flash::convert_type(tOrO); +// Write to gO +#pragma unroll + for (int m = 0; m < size<1>(rO); ++m) { + const int idx = bidx * kBlockM + get<0>(tOcOaccum(0, m, 0)); + if (idx < params.b * params.h * params.seqlen_q) { + const int batch_idx = idx / (params.h * params.seqlen_q); + const int head_idx = + (idx - batch_idx * (params.h * params.seqlen_q)) / params.seqlen_q; + // The index to the rows of Q + const int row = idx - batch_idx * (params.h * params.seqlen_q) - + head_idx * params.seqlen_q; + auto o_ptr = reinterpret_cast(params.o_ptr) + + batch_idx * params.o_batch_stride + + head_idx * params.o_head_stride + row * params.o_row_stride; +#pragma unroll + for (int k = 0; k < size<2>(rO); ++k) { + if (Is_even_K || tOpOaccum(k)) { + const int col = get<1>(tOcOaccum(0, m, k)); + Tensor gO = make_tensor(make_gmem_ptr(o_ptr + col), + Shape(rO))::value>>{}, + Stride<_1>{}); + // TODO: Should check if this is using vectorized store, but it seems + // pretty fast + copy(rO(_, m, k), gO); + // if (bidx == 0 && tidx == 0) { printf("tidx = %d, idx = %d, + // batch_idx = %d, head_idx = %d, row = %d, col = %d\n", tidx, idx, + // batch_idx, head_idx, row, col); print(rO(_, m, k)); print(gO); } + // reinterpret_cast(o_ptr)[col / 4] = + // recast(rO)(0, m, k); + } + } + } + } +} + +} // namespace flash diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h b/external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h new file mode 100644 index 000000000..75ad04499 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h @@ -0,0 +1,356 @@ +/****************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +// #include + +#include "static_switch.h" +#include "flash.h" +#include "flash_fwd_kernel.h" + +template +__global__ void flash_fwd_kernel(Flash_fwd_params params) { + static_assert(!(Is_causal && Is_local)); // If Is_local is true, Is_causal should be false + flash::compute_attn(params); +} + +template +__global__ void flash_fwd_splitkv_kernel(Flash_fwd_params params) { + flash::compute_attn_splitkv(params); +} + +template +__global__ void flash_fwd_splitkv_combine_kernel(Flash_fwd_params params) { + static_assert(Log_max_splits >= 1); + flash::combine_attn_seqk_parallel(params); +} + +template +void run_flash_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr size_t smem_size = Kernel_traits::kSmemSize; + // printf("smem_size = %d\n", smem_size); + + // Work-around for gcc 7. It doesn't like nested BOOL_SWITCH. + // https://github.com/kokkos/kokkos-kernels/issues/349 + // https://github.com/HazyResearch/flash-attention/issues/21 + + const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid(num_m_block, params.b, params.h); + const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0 && params.seqlen_q % Kernel_traits::kBlockM == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + const bool return_softmax = params.p_ptr != nullptr; + BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH((params.window_size_left >= 0 || params.window_size_right >= 0) && !Is_causal, Is_local, [&] { + BOOL_SWITCH(return_softmax, ReturnSoftmaxConst, [&] { + BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // Will only return softmax if dropout, to reduce compilation time. + // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. + // If return_softmax, set IsEvenMNConst to false to reduce number of templates + // If head dim > 128, set IsEvenMNConst to false to reduce number of templates + // If Is_local, set Is_causal to false + auto kernel = &flash_fwd_kernel; + // auto kernel = &flash_fwd_kernel; + // printf("IsEvenMNConst = %d, IsEvenKConst = %d, Is_local = %d, Is_causal = %d, ReturnSoftmaxConst = %d, Is_dropout = %d\n", int(IsEvenMNConst), int(IsEvenKConst), int(Is_local), int(Is_causal), int(ReturnSoftmaxConst), int(Is_dropout)); + // auto kernel = &flash_fwd_kernel; + if (smem_size >= 48 * 1024) { + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + cudaFuncSetAttribute( + kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); + } + // int ctas_per_sm; + // cudaError status_ = cudaOccupancyMaxActiveBlocksPerMultiprocessor( + // &ctas_per_sm, kernel, Kernel_traits::kNThreads, smem_size); + // printf("smem_size = %d, CTAs per SM = %d\n", int(smem_size), ctas_per_sm); + kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); + }); + }); + }); + }); +} + +template +void run_flash_splitkv_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { + static_assert(!Kernel_traits::Is_Q_in_regs, "SplitKV implementation does not support Is_Q_in_regs"); + static_assert(!Kernel_traits::Share_Q_K_smem, "SplitKV implementation does not support Share_Q_K_smem"); + constexpr size_t smem_size = Kernel_traits::kSmemSize; + const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid(num_m_block, params.num_splits > 1 ? params.num_splits : params.b, params.num_splits > 1 ? params.b * params.h : params.h); + const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0 && params.seqlen_q % Kernel_traits::kBlockM == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH((params.window_size_left >= 0 || params.window_size_right >= 0) && !Is_causal, Is_local, [&] { + BOOL_SWITCH(params.num_splits > 1, Split, [&] { + BOOL_SWITCH(params.knew_ptr != nullptr, Append_KV, [&] { + BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // If Append_KV, then we must have seqlen_offsets, which means cu_seqlens_k != nullptr. + // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. + // If Is_local, set Is_causal to false + auto kernel = &flash_fwd_splitkv_kernel; + // auto kernel = &flash_fwd_splitkv_kernel; + // auto kernel = &flash_fwd_splitkv_kernel; + if (smem_size >= 48 * 1024) { + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + cudaFuncSetAttribute( + kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); + } + kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); + }); + }); + }); + }); + }); + }); + if (params.num_splits > 1) { + // We want kBlockM to be as small as possible for more parallelism. + // With 128 threads we can load 512 elements at a time, so if headdim is divisible by 128, kBlockM = 4. + // If headdim is divisible by 64, then we set kBlockM = 8, etc. + constexpr static int kBlockM = Kernel_traits::kHeadDim % 128 == 0 ? 4 : (Kernel_traits::kHeadDim % 64 == 0 ? 8 : 16); + dim3 grid_combine((params.b * params.h * params.seqlen_q + kBlockM - 1) / kBlockM); + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + if (params.num_splits <= 2) { + flash_fwd_splitkv_combine_kernel<<>>(params); + } else if (params.num_splits <= 4) { + flash_fwd_splitkv_combine_kernel<<>>(params); + } else if (params.num_splits <= 8) { + flash_fwd_splitkv_combine_kernel<<>>(params); + } else if (params.num_splits <= 16) { + flash_fwd_splitkv_combine_kernel<<>>(params); + } else if (params.num_splits <= 32) { + flash_fwd_splitkv_combine_kernel<<>>(params); + } else if (params.num_splits <= 64) { + flash_fwd_splitkv_combine_kernel<<>>(params); + } else if (params.num_splits <= 128) { + flash_fwd_splitkv_combine_kernel<<>>(params); + } + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); + } +} + +template +void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int kBlockM = 64; // Fixed for all head dimensions + // TD [2023-08-28]: nvcc segfaults for headdim 96 with block size 64 x 256, + // and for headdim 192 with block size 64 x 128. + // Also for headdim 160 with block size 64 x 128 after the rotary addition. + constexpr static int kBlockN = Headdim <= 64 ? 256 : (Headdim <= 128 ? 128 : 64); + run_flash_splitkv_fwd>(params, stream); +} + +template +void run_mha_fwd_hdim32(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 32; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + }); + }); +} + +template +void run_mha_fwd_hdim64(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 64; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if constexpr(!Is_dropout) { + // Using 8 warps is 18% slower for seqlen=2k, 2 warps is 5% slower + // Using block size (64 x 256) is 27% slower for seqlen=2k + // Using block size (256 x 64) is 85% slower for seqlen=2k, because of register spilling + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + }); + }); +} + +template +void run_mha_fwd_hdim96(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 96; + // auto dprops = at::cuda::getCurrentDeviceProperties(); + // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; + bool is_sm8x = true; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square), + if (is_sm8x) { + if constexpr(!Is_causal) { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // These two are always slower + // run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); + }); + }); +} + +template +void run_mha_fwd_hdim128(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 128; + // auto dprops = at::cuda::getCurrentDeviceProperties(); + // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; + bool is_sm8x = true; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if constexpr(!Is_dropout) { + // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square), + // and 128 x 32 (48 KB smem) is the fastest for non-causal since we get 2 CTAs per SM. + if (is_sm8x) { + if constexpr(!Is_causal) { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // Using 8 warps (128 x 128 and 256 x 64) is 28% slower for seqlen=2k + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // 1st ones are good for H100, A100 + // 2nd one is good for A6000 bc we get slightly better occupancy + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + }); + }); +} + +template +void run_mha_fwd_hdim160(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 160; + // auto dprops = at::cuda::getCurrentDeviceProperties(); + // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; + bool is_sm8x = true; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + // For A100, H100, 128 x 32 is the fastest. + // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square), + // and 128 x 64 with 8 warps is the fastest for non-causal. + if (is_sm8x) { + if constexpr(!Is_causal) { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); + }); + }); +} + +template +void run_mha_fwd_hdim192(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 192; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if constexpr(!Is_dropout) { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); + }); + }); +} + +template +void run_mha_fwd_hdim224(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 224; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64)) { // 112 KB + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // We can't do 128 x 32 with 8 warps because with headdim 224, kBlockKSmem = 32. + // If we have N = 32, there are only 1024 elements to load at once, where each load + // is 8 elements. This means we can only use 128 threads and not 256 threads. + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + }); + }); +} + +template +void run_mha_fwd_hdim256(Flash_fwd_params ¶ms, cudaStream_t stream) { + constexpr static int Headdim = 256; + int device; + cudaGetDevice(&device); + int max_smem_per_sm, max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_sm, cudaDevAttrMaxSharedMemoryPerMultiprocessor, device); + status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_sm = %d, max_smem_per_block = %d\n", max_smem_per_sm, max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + // For A100, we want to run with 128 x 64 (128KB smem). + // For H100 we want to run with 64 x 64 (96KB smem) since then we can get 2 CTAs per SM. + if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64) && max_smem_per_sm < 4 * Headdim * (64 + 2 * 64)) { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } + // 64 KB + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // 96 KB + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + }); + }); +} diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_bf16_sm80.cu new file mode 100644 index 000000000..477c560a7 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_fp16_sm80.cu new file mode 100644 index 000000000..914cd23bb --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim128_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_bf16_sm80.cu new file mode 100644 index 000000000..d753d59d5 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_fp16_sm80.cu new file mode 100644 index 000000000..552c25d02 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim160_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_bf16_sm80.cu new file mode 100644 index 000000000..e6b350a7c --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_fp16_sm80.cu new file mode 100644 index 000000000..b9c193501 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim192_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_bf16_sm80.cu new file mode 100644 index 000000000..b6bf081f2 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_fp16_sm80.cu new file mode 100644 index 000000000..0d09606fb --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim224_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_bf16_sm80.cu new file mode 100644 index 000000000..06a9524ac --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_fp16_sm80.cu new file mode 100644 index 000000000..54fd3b87f --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim256_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_bf16_sm80.cu new file mode 100644 index 000000000..beff74ce8 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_fp16_sm80.cu new file mode 100644 index 000000000..d97c9eaa0 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim32_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_bf16_sm80.cu new file mode 100644 index 000000000..aed05fadc --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_fp16_sm80.cu new file mode 100644 index 000000000..3b905f62c --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim64_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_bf16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_bf16_sm80.cu new file mode 100644 index 000000000..00a5972bf --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_bf16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_fp16_sm80.cu b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_fp16_sm80.cu new file mode 100644 index 000000000..95a76967c --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_split_hdim96_fp16_sm80.cu @@ -0,0 +1,7 @@ +// Copyright (c) 2023, Tri Dao. +// Splitting the different head dimensions to different files to speed up compilation. +// This file is auto-generated. See "generate_kernels.py" + +#include "flash_fwd_launch_template.h" + +template void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream); diff --git a/external_libs/runtime/flash_attn/lib/kernel_traits.h b/external_libs/runtime/flash_attn/lib/kernel_traits.h new file mode 100644 index 000000000..f000ff24d --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/kernel_traits.h @@ -0,0 +1,397 @@ +/****************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +#include "cute/algorithm/copy.hpp" + +#include "cutlass/cutlass.h" +#include "cutlass/layout/layout.h" +#include + +using namespace cute; + +template +struct Flash_kernel_traits { + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using Element = elem_type; + static constexpr bool Has_cp_async = true; +#else + using Element = cutlass::half_t; + static constexpr bool Has_cp_async = false; +#endif + + using ElementAccum = float; + using index_t = uint32_t; + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using MMA_Atom_Arch = std::conditional_t< + std::is_same_v, + MMA_Atom, + MMA_Atom + >; + using ValLayoutMNK = Layout>; +#else + using MMA_Atom_Arch = MMA_Atom; + using ValLayoutMNK = Layout>; +#endif + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; +#else + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; +#endif +}; + +// If Share_Q_K_smem is true, that forces Is_Q_in_regs to be true +template > +struct Flash_fwd_kernel_traits : public Base { + using Element = typename Base::Element; + using ElementAccum = typename Base::ElementAccum; + using index_t = typename Base::index_t; + static constexpr bool Has_cp_async = Base::Has_cp_async; + using SmemCopyAtom = typename Base::SmemCopyAtom; + using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; + + static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; + static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; + + // The number of threads. + static constexpr int kNWarps = kNWarps_; + static constexpr int kNThreads = kNWarps * 32; + + static constexpr int kBlockM = kBlockM_; + static constexpr int kBlockN = kBlockN_; + static constexpr int kHeadDim = kHeadDim_; + static_assert(kHeadDim % 32 == 0); + static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; + static constexpr int kBlockKGmem = kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); + static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; + + using TiledMma = TiledMMA< + typename Base::MMA_Atom_Arch, + Layout,_1,_1>>, // 4x1x1 or 8x1x1 thread group + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM + + using SmemLayoutAtomQ = decltype( + composition(Swizzle{}, + // This has to be kBlockKSmem, using kHeadDim gives wrong results for d=128 + Layout>, + Stride, _1>>{})); + using SmemLayoutQ = decltype(tile_to_shape( + SmemLayoutAtomQ{}, + Shape, Int>{})); + + using SmemLayoutKV = decltype(tile_to_shape( + SmemLayoutAtomQ{}, + Shape, Int>{})); + + // This has to be kBlockN and not 8, otherwise we get wrong results for d=128 + using SmemLayoutAtomVtransposedNoSwizzle = Layout, Int>, + Stride<_1, Int>>; + using SmemLayoutAtomVtransposed = decltype( + composition(Swizzle{}, SmemLayoutAtomVtransposedNoSwizzle{})); + using SmemLayoutVtransposed = decltype(tile_to_shape( + SmemLayoutAtomVtransposed{}, + Shape, Int>{})); + // Maybe the VtransposeNoSwizzle just needs to have the right shape + // And the strides don't matter? + using SmemLayoutVtransposedNoSwizzle = decltype(tile_to_shape( + SmemLayoutAtomVtransposedNoSwizzle{}, + Shape, Int>{})); + // using SmemLayoutVtransposedNoSwizzle = decltype(SmemLayoutVtransposed{}.layout_fn()); + + using SmemLayoutAtomO = decltype( + composition(Swizzle{}, + Layout, Int>, + Stride, _1>>{})); + using SmemLayoutO = decltype(tile_to_shape( + SmemLayoutAtomO{}, + Shape, Int>{})); + using SmemCopyAtomO = Copy_Atom; + using SmemCopyAtomOaccum = Copy_Atom; + + static constexpr int kSmemQCount = size(SmemLayoutQ{}); + static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; + static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); + static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); + static constexpr int kSmemSize = Share_Q_K_smem ? std::max(kSmemQSize, kSmemKVSize) : kSmemQSize + kSmemKVSize; + + static constexpr int kGmemElemsPerLoad = sizeof(cute::uint128_t) / sizeof(Element); + static_assert(kHeadDim % kGmemElemsPerLoad == 0, "kHeadDim must be a multiple of kGmemElemsPerLoad"); + // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because of bank conflicts. + // For example, for d=128, smem is split into 2 "pages", each page takes care of columns + // 0-63 and 64-127. If we have 16 threads per row for gmem read, when we write to smem, + // thread 0 - 7 will write to the first page and thread 8 - 15 will write to the second page, + // to the same banks. + static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRow == 0, "kNThreads must be a multiple of kGmemThreadsPerRow"); + using GmemLayoutAtom = Layout, Int>, + Stride, _1>>; + + // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we won't be reading + // from the same address by the same threadblock. This is slightly faster. + using Gmem_copy_struct = std::conditional_t< + Has_cp_async, + SM80_CP_ASYNC_CACHEGLOBAL, + DefaultCopy + >; + using GmemTiledCopyQKV = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per read + using GmemTiledCopyO = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP"); + using GmemLayoutAtomP = Layout, Int>, + Stride, _1>>; + + using GmemTiledCopyP = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtomP{}, + Layout>{})); // Val layout, 8 vals per store + + using GmemLayoutAtomOaccum = std::conditional_t< + kBlockKSmem == 32, + Layout, // Thread layout, 8 threads per row + Stride< _8, _1>>, + Layout, // Thread layout, 16 threads per row + Stride< _16, _1>> + >; + using GmemTiledCopyOaccum = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtomOaccum{}, + Layout>{})); // Val layout, 4 vals per store + using GmemLayoutAtomRotcossin = GmemLayoutAtom; + using GmemTiledCopyRotcossin = decltype( + make_tiled_copy(Copy_Atom, Element>{}, + GmemLayoutAtomRotcossin{}, + Layout>{})); // Val layout, 4 vals per load + using GmemTiledCopyRotcossinCont = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtomRotcossin{}, + Layout>{})); // Val layout, 8 vals per load +}; + +// Is_V_in_regs is an option to reduce smem usage, but will increase register pressue. +// No_double_buffer is another option to reduce smem usage, but will slow things down. +template > +struct Flash_bwd_kernel_traits : public Base { + using Element = typename Base::Element; + using ElementAccum = typename Base::ElementAccum; + using index_t = typename Base::index_t; + static constexpr bool Has_cp_async = Base::Has_cp_async; + using SmemCopyAtom = typename Base::SmemCopyAtom; + using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; + + static constexpr bool Is_V_in_regs = Is_V_in_regs_; + static constexpr bool No_double_buffer = No_double_buffer_; + + // The number of threads. + static constexpr int kNWarps = kNWarps_; + static constexpr int kNThreads = kNWarps * 32; + + static constexpr int kBlockM = kBlockM_; + static constexpr int kBlockN = kBlockN_; + static constexpr int kHeadDim = kHeadDim_; + static_assert(kHeadDim % 32 == 0); + static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; + static constexpr int kBlockKGmem = kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); + static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; + + static constexpr int AtomLayoutMSdP = AtomLayoutMSdP_; + static_assert(kNWarps % AtomLayoutMSdP == 0); + static_assert(kNWarps % AtomLayoutNdKV == 0); + static_assert(kNWarps % AtomLayoutMdQ == 0); + + using TiledMmaSdP = TiledMMA< + typename Base::MMA_Atom_Arch, + Layout, Int, _1>>, + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM + + using TiledMmadKV = TiledMMA< + typename Base::MMA_Atom_Arch, + Layout, Int, _1>>, + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM + + using TiledMmadQ = TiledMMA< + typename Base::MMA_Atom_Arch, + Layout, Int, _1>>, // 2x4x1 or 4x2x1 thread group + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM + + using SmemLayoutAtomQdO = decltype( + composition(Swizzle{}, + Layout>, + Stride, _1>>{})); + using SmemLayoutQdO = decltype(tile_to_shape( + SmemLayoutAtomQdO{}, + make_shape(Int{}, Int{}))); + + using SmemLayoutAtomKV = decltype( + composition(Swizzle{}, + Layout, Int>, + Stride, _1>>{})); + using SmemLayoutKV = decltype(tile_to_shape( + // SmemLayoutAtomQdO{}, + SmemLayoutAtomKV{}, + make_shape(Int{}, Int{}))); + + using SmemLayoutAtomKtransposedNoSwizzle = Layout, Int>, + Stride<_1, Int>>; + using SmemLayoutAtomKtransposed = decltype( + composition(Swizzle{}, SmemLayoutAtomKtransposedNoSwizzle{})); + using SmemLayoutKtransposed = decltype(tile_to_shape( + SmemLayoutAtomKtransposed{}, + make_shape(Int{}, Int{}))); + // Maybe the KtransposeNoSwizzle just needs to have the right shape + // And the strides don't matter? + using SmemLayoutKtransposedNoSwizzle = decltype(tile_to_shape( + SmemLayoutAtomKtransposedNoSwizzle{}, + make_shape(Int{}, Int{}))); + // using SmemLayoutKtransposedNoSwizzle = decltype(SmemLayoutKtransposed{}.layout_fn()); + + // TODO: generalize to other values of kBlockN + // TODO: what should be the Swizzle here? 3 is faster than 1, and 1 is faster than 2 + // static constexpr int kPBlockN = kBlockN; + static_assert(kBlockN >= 64); + // TD [2023-03-19]: Idk why kPBlockN = 16 and kSwizzlePdS=3 is the fastest. + static constexpr int kPBlockN = 64; + static_assert(kPBlockN == 16 || kPBlockN == 32 || kPBlockN == 64); + // static constexpr int kSwizzlePdS = kPBlockN == 16 ? 1 : (kPBlockN == 32 ? 2 : 3); + static constexpr int kSwizzlePdS = 3; + using SmemLayoutAtomPdS = decltype( + composition(Swizzle{}, + Layout, Int>, + Stride, _1>>{})); + using SmemLayoutPdS = decltype(tile_to_shape( + SmemLayoutAtomPdS{}, + make_shape(Int{}, Int{}))); + using SmemLayoutAtomPdStransposedNoSwizzle = Layout, Int>, + Stride<_1, Int>>; + using SmemLayoutAtomPdStransposed = decltype( + composition(Swizzle{}, SmemLayoutAtomPdStransposedNoSwizzle{})); + using SmemLayoutPdStransposed = decltype(tile_to_shape( + SmemLayoutAtomPdStransposed{}, + make_shape(Int{}, Int{}))); + using SmemLayoutPdStransposedNoSwizzle = decltype(tile_to_shape( + SmemLayoutAtomPdStransposedNoSwizzle{}, + make_shape(Int{}, Int{}))); + // using SmemLayoutPdStransposedNoSwizzle = decltype(SmemLayoutPdStransposed{}.layout_fn()); + using SmemCopyAtomPdS = Copy_Atom; + + using SmemLayoutAtomQdOtransposedNoSwizzle = Layout, Int>, + Stride<_1, Int>>; + using SmemLayoutAtomQdOtransposed = decltype( + composition(Swizzle{}, SmemLayoutAtomQdOtransposedNoSwizzle{})); + using SmemLayoutQdOtransposed = decltype(tile_to_shape( + SmemLayoutAtomQdOtransposed{}, + make_shape(Int{}, Int{}))); + using SmemLayoutQdOtransposedNoSwizzle = decltype(tile_to_shape( + SmemLayoutAtomQdOtransposedNoSwizzle{}, + make_shape(Int{}, Int{}))); + // using SmemLayoutQdOtransposedNoSwizzle = decltype(SmemLayoutQdOtransposed{}.layout_fn()); + + using SmemLayoutAtomdKV = decltype( + composition(Swizzle{}, + Layout>, + Stride, _1>>{})); + using SmemLayoutdKV = decltype(tile_to_shape( + SmemLayoutAtomdKV{}, + make_shape(Int{}, Int{}))); + using SmemCopyAtomdKV = Copy_Atom; + + using SmemLayoutAtomdQ = decltype( + composition(Swizzle{}, + Layout>, + Stride, _1>>{})); + using SmemLayoutdQ = decltype(tile_to_shape( + SmemLayoutAtomdQ{}, + make_shape(Int{}, Int{}))); + using SmemCopyAtomdQ = Copy_Atom; + + static constexpr int kSmemQdOCount = size(SmemLayoutQdO{}) * (No_double_buffer ? 2 : 3); // Double buffer for sQ + static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; + static constexpr int kSmemdSCount = size(SmemLayoutPdS{}); + static constexpr int kSmemPCount = size(SmemLayoutPdS{}); + static constexpr int kSmemdQCount = size(SmemLayoutdQ{}); + static constexpr int kSmemQdOSize = kSmemQdOCount * sizeof(Element); + static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); + static constexpr int kSmemdSSize = kSmemdSCount * sizeof(Element); + static constexpr int kSmemPSize = kSmemPCount * sizeof(Element); + static constexpr int kSmemdQSize = kSmemdQCount * sizeof(Element); + static constexpr int kSmemSize = kSmemQdOSize + + (!Is_V_in_regs + ? kSmemKVSize + kSmemdSSize + std::max(kSmemPSize, kSmemdQSize) + : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + std::max(kSmemPSize, kSmemdQSize))); + static constexpr int kSmemSize1colblock = kSmemQdOSize + + (!Is_V_in_regs + ? kSmemKVSize + kSmemdSSize + kSmemPSize + : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + kSmemPSize)); + static constexpr int kSmemSize1rowblock = kSmemQdOSize / 3 * 2 + kSmemKVSize / 2 * 3 + + kSmemdSSize + kSmemPSize; + + + static constexpr int kGmemElemsPerLoad = sizeof(cute::uint128_t) / sizeof(Element); + static_assert(kHeadDim % kGmemElemsPerLoad == 0, "kHeadDim must be a multiple of kGmemElemsPerLoad"); + // Using kBlockKSmem instead of kHeadDim here to avoid bank conflicts, but doesn't seem + // to affect speed in practice. + static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRow == 0, "kNThreads must be a multiple of kGmemThreadsPerRow"); + using GmemLayoutAtom = Layout, Int>, + Stride, _1>>; + + // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we won't be reading + // from the same address by the same threadblock. This is slightly faster. + using Gmem_copy_struct = std::conditional_t< + Has_cp_async, + SM80_CP_ASYNC_CACHEGLOBAL, + DefaultCopy + >; + using GmemTiledCopyQKV = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per read + using GmemTiledCopydO = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopydKV = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopydQ = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemLayoutAtomdQaccum = std::conditional_t< + kBlockKSmem == 32, + Layout, // Thread layout, 8 threads per row + Stride< _8, _1>>, + Layout, // Thread layout, 16 threads per row + Stride< _16, _1>> + >; + using GmemTiledCopydQaccum = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtomdQaccum{}, + Layout>{})); // Val layout, 4 vals per store + + using GmemTiledCopydQaccumAtomicAdd = decltype( + make_tiled_copy(Copy_Atom{}, + Layout, // Thread layout, 8 threads per row + Stride<_32, _1>>{}, + Layout>{})); // Val layout, 1 val per store + +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h b/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h new file mode 100644 index 000000000..e07f38390 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h @@ -0,0 +1,159 @@ +/****************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +#include "cute/algorithm/copy.hpp" + +#include "cutlass/cutlass.h" +#include "cutlass/layout/layout.h" +#include + +using namespace cute; + +template +struct Flash_kernel_traits_sm90 { + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using Element = elem_type; + static constexpr bool Has_cp_async = true; +#else + using Element = cutlass::half_t; + static constexpr bool Has_cp_async = false; +#endif + + using ElementAccum = float; + using index_t = uint32_t; + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using MMA_Atom_Arch = std::conditional_t< + std::is_same_v, + MMA_Atom, + MMA_Atom + >; + using ValLayoutMNK = Layout>; +#else + using MMA_Atom_Arch = MMA_Atom; + using ValLayoutMNK = Layout>; +#endif + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; +#else + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; +#endif +}; + +template > +struct Flash_fwd_kernel_traits : public Base { + using Element = typename Base::Element; + using ElementAccum = typename Base::ElementAccum; + using index_t = typename Base::index_t; + static constexpr bool Has_cp_async = Base::Has_cp_async; + using SmemCopyAtom = typename Base::SmemCopyAtom; + using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; + + static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; + static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; + + // The number of threads. + static constexpr int kNWarps = kNWarps_; + static constexpr int kNThreads = kNWarps * 32; + + static constexpr int kBlockM = kBlockM_; + static constexpr int kBlockN = kBlockN_; + static constexpr int kHeadDim = kHeadDim_; + static_assert(kHeadDim % 32 == 0); + static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; + static constexpr int kBlockKGmem = kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); + static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; + + using TiledMma = TiledMMA< + typename Base::MMA_Atom_Arch, + Layout,_1,_1>>, // 4x1x1 or 8x1x1 thread group + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM + + using SmemLayoutAtomQ = decltype( + composition(Swizzle{}, + // This has to be kBlockKSmem, using kHeadDim gives wrong results for d=128 + Layout>, + Stride, _1>>{})); + using SmemLayoutQ = decltype(tile_to_shape( + SmemLayoutAtomQ{}, + Shape, Int>{})); + + using SmemLayoutKV = decltype(tile_to_shape( + SmemLayoutAtomQ{}, + Shape, Int>{})); + + using SmemLayoutAtomVtransposed = decltype( + composition(Swizzle{}, + // This has to be kBlockN and not 8, otherwise we get wrong results for d=128 + Layout, Int>, + Stride<_1, Int>>{})); + using SmemLayoutVtransposed = decltype(tile_to_shape( + SmemLayoutAtomVtransposed{}, + Shape, Int>{})); + // Maybe the VtransposeNoSwizzle just needs to have the right shape + // And the strides don't matter? + using SmemLayoutVtransposedNoSwizzle = decltype(SmemLayoutVtransposed{}.layout_fn()); + + using SmemLayoutAtomO = decltype( + composition(Swizzle{}, + Layout, Int>, + Stride, _1>>{})); + using SmemLayoutO = decltype(tile_to_shape( + SmemLayoutAtomO{}, + Shape, Int>{})); + using SmemCopyAtomO = Copy_Atom; + + static constexpr int kSmemQCount = size(SmemLayoutQ{}); + static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; + static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); + static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); + static constexpr int kSmemSize = Share_Q_K_smem ? std::max(kSmemQSize, kSmemKVSize) : kSmemQSize + kSmemKVSize; + + static constexpr int kGmemElemsPerLoad = sizeof(cute::uint128_t) / sizeof(Element); + static_assert(kHeadDim % kGmemElemsPerLoad == 0, "kHeadDim must be a multiple of kGmemElemsPerLoad"); + // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because of bank conflicts. + // For example, for d=128, smem is split into 2 "pages", each page takes care of columns + // 0-63 and 64-127. If we have 16 threads per row for gmem read, when we write to smem, + // thread 0 - 7 will write to the first page and thread 8 - 15 will write to the second page, + // to the same banks. + static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRow == 0, "kNThreads must be a multiple of kGmemThreadsPerRow"); + using GmemLayoutAtom = Layout, Int>, + Stride, _1>>; + + // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we won't be reading + // from the same address by the same threadblock. This is slightly faster. + using Gmem_copy_struct = std::conditional_t< + Has_cp_async, + SM80_CP_ASYNC_CACHEGLOBAL, + DefaultCopy + >; + using GmemTiledCopyQKV = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per read + using GmemTiledCopyO = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP"); + using GmemLayoutAtomP = Layout, Int>, + Stride, _1>>; + + using GmemTiledCopyP = decltype( + make_tiled_copy(Copy_Atom{}, + GmemLayoutAtomP{}, + Layout>{})); // Val layout, 8 vals per store + +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/philox.cuh b/external_libs/runtime/flash_attn/lib/philox.cuh similarity index 97% rename from runtime/lib/backends/cuda/providers/default/flash_attn/kernels/philox.cuh rename to external_libs/runtime/flash_attn/lib/philox.cuh index 0d9e6632e..6ce1440f2 100644 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/philox.cuh +++ b/external_libs/runtime/flash_attn/lib/philox.cuh @@ -1,9 +1,7 @@ // Pytorch also has an implementation of Philox RNG: https://github.com/pytorch/pytorch/blob/8ca3c881db3e3510fcb7725389f6a0633c9b992c/torch/csrc/jit/tensorexpr/cuda_random.h #pragma once // Philox CUDA. -namespace brt { -namespace cuda { -namespace kernel { + namespace flash { struct ull2 { @@ -165,6 +163,3 @@ private: }; } // namespace -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/external_libs/runtime/flash_attn/lib/softmax.h b/external_libs/runtime/flash_attn/lib/softmax.h new file mode 100644 index 000000000..09a93f145 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/softmax.h @@ -0,0 +1,283 @@ +/****************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +#include + +#include + +#include + +#include "philox.cuh" +#include "utils.h" + +namespace flash { + +using namespace cute; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +__device__ inline void thread_reduce_(Tensor const &tensor, Tensor &summary, Operator &op) { + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(summary) == size<0>(tensor)); + #pragma unroll + for (int mi = 0; mi < size<0>(tensor); mi++) { + summary(mi) = zero_init ? tensor(mi, 0) : op(summary(mi), tensor(mi, 0)); + #pragma unroll + for (int ni = 1; ni < size<1>(tensor); ni++) { + summary(mi) = op(summary(mi), tensor(mi, ni)); + } + } +} + +template +__device__ inline void quad_allreduce_(Tensor &dst, Tensor &src, Operator &op) { + CUTE_STATIC_ASSERT_V(size(dst) == size(src)); + #pragma unroll + for (int i = 0; i < size(dst); i++){ + dst(i) = Allreduce<4>::run(src(i), op); + } +} + +template +__device__ inline void reduce_(Tensor const& tensor, Tensor &summary, Operator &op) { + thread_reduce_(tensor, summary, op); + quad_allreduce_(summary, summary, op); +} + +template +__device__ inline void reduce_max(Tensor const& tensor, Tensor &max){ + MaxOp max_op; + reduce_(tensor, max, max_op); +} + +template +__device__ inline void reduce_sum(Tensor const& tensor, Tensor &sum){ + SumOp sum_op; + reduce_(tensor, sum, sum_op); +} + +// Apply the exp to all the elements. +template +inline __device__ void scale_apply_exp2(Tensor &tensor, Tensor const &max, const float scale) { + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); + #pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + // If max is -inf, then all elements must have been -inf (possibly due to masking). + // We don't want (-inf - (-inf)) since that would give NaN. + // If we don't have float around M_LOG2E the multiplication is done in fp64. + const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * (Scale_max ? scale : float(M_LOG2E)); + #pragma unroll + for (int ni = 0; ni < size<1>(tensor); ++ni) { + // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - + // max * log_2(e)) This allows the compiler to use the ffma + // instruction instead of fadd and fmul separately. + tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); + } + } +} + +// Apply the exp to all the elements. +template +inline __device__ void max_scale_exp2_sum(Tensor &tensor, Tensor &max, Tensor &sum, const float scale) { + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); + #pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + MaxOp max_op; + max(mi) = zero_init ? tensor(mi, 0) : max_op(max(mi), tensor(mi, 0)); + #pragma unroll + for (int ni = 1; ni < size<1>(tensor); ni++) { + max(mi) = max_op(max(mi), tensor(mi, ni)); + } + max(mi) = Allreduce<4>::run(max(mi), max_op); + // If max is -inf, then all elements must have been -inf (possibly due to masking). + // We don't want (-inf - (-inf)) since that would give NaN. + const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * scale; + sum(mi) = 0; + #pragma unroll + for (int ni = 0; ni < size<1>(tensor); ++ni) { + // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - + // max * log_2(e)) This allows the compiler to use the ffma + // instruction instead of fadd and fmul separately. + tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); + sum(mi) += tensor(mi, ni); + } + SumOp sum_op; + sum(mi) = Allreduce<4>::run(sum(mi), sum_op); + } +} + +template +inline __device__ void apply_mask(Tensor &tensor, const int max_seqlen_k, + const int col_idx_offset_ = 0) { + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout::rank == 2, "Only support 2D Tensor"); + const int lane_id = threadIdx.x % 32; + const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; + #pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; + #pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; + if (col_idx >= max_seqlen_k) { + // Without the "make_coord" we get wrong results + #pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + tensor(mi, make_coord(j, nj)) = -INFINITY; + } + } + } + } +} + +template +inline __device__ void apply_mask_local(Tensor &tensor, const int col_idx_offset_, + const int max_seqlen_k, const int row_idx_offset, + const int max_seqlen_q, const int warp_row_stride, + const int window_size_left, const int window_size_right) { + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout::rank == 2, "Only support 2D Tensor"); + const int lane_id = threadIdx.x % 32; + const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; + #pragma unroll + for (int mi = 0; mi < size<0, 1>(tensor); ++mi) { + const int row_idx_base = row_idx_offset + mi * warp_row_stride; + #pragma unroll + for (int i = 0; i < size<0, 0>(tensor); ++i) { + const int row_idx = row_idx_base + i * 8; + const int col_idx_limit_left = std::max(0, row_idx + max_seqlen_k - max_seqlen_q - window_size_left); + const int col_idx_limit_right = std::min(max_seqlen_k, row_idx + 1 + max_seqlen_k - max_seqlen_q + window_size_right); + #pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; + #pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; + if (col_idx >= col_idx_limit_right || (HasWSLeft && col_idx < col_idx_limit_left)) { + tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY; + } + } + } + // if (cute::thread0()) { + // printf("mi = %d, i = %d, row_idx = %d, max_seqlen_k = %d\n", mi, i, row_idx, max_seqlen_k); + // print(tensor(make_coord(i, mi), _)); + // // print(tensor(_, j + nj * size<1, 0>(tensor))); + // } + } + } +} + +template +inline __device__ void apply_mask_causal(Tensor &tensor, const int col_idx_offset_, + const int max_seqlen_k, const int row_idx_offset, + const int max_seqlen_q, const int warp_row_stride) { + // Causal masking is equivalent to local masking with window_size_left = infinity and window_size_right = 0 + apply_mask_local(tensor, col_idx_offset_, max_seqlen_k, row_idx_offset, + max_seqlen_q, warp_row_stride, -1, 0); +} + +template +inline __device__ void apply_mask_causal_w_idx( + Tensor &tensor, Tensor const &idx_rowcol, + const int col_idx_offset_, const int max_seqlen_k, const int row_idx_offset) +{ + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 2, "Only support 2D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(tensor) == size<0>(idx_rowcol)); + CUTE_STATIC_ASSERT_V(size<1>(tensor) == size<1>(idx_rowcol)); + #pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + const int col_idx_limit = std::min(max_seqlen_k, 1 + row_idx_offset + get<0>(idx_rowcol(mi, 0))); + #pragma unroll + for (int ni = 0; ni < size<1, 1>(tensor); ++ni) { + if (col_idx_offset_ + get<1>(idx_rowcol(0, ni)) >= col_idx_limit) { + tensor(mi, ni) = -INFINITY; + } + } + // if (cute::thread0()) { + // printf("ni = %d, j = %d, col_idx = %d, max_seqlen_k = %d\n", ni, j, col_idx, max_seqlen_k); + // print(tensor(_, make_coord(j, ni))); + // // print(tensor(_, j + ni * size<1, 0>(tensor))); + // } + } +} + +template +inline __device__ void apply_dropout(Tensor &tensor, uint8_t p_dropout_in_uint8_t, + unsigned long long seed, unsigned long long offset, + int block_row_start, int block_col_start, + int block_row_stride) { + // tensor has shape (8, MMA_M, MMA_N / 2) + using T = typename Engine::value_type; + auto encode_dropout = [](bool keep, T val) { + return keep ? val : (encode_dropout_in_sign_bit ? -val : T(0)); + }; + static_assert(decltype(size<2>(tensor))::value % 2 == 0); + const uint16_t p_dropout_8bit_in_uint16_t = uint16_t(p_dropout_in_uint8_t); + const uint32_t p_dropout_8bit_in_uint32_t = (uint32_t(p_dropout_8bit_in_uint16_t) << 16) | uint32_t(p_dropout_8bit_in_uint16_t); + // if (cute::thread0()) { printf("threshold2 = 0x%x\n", p_dropout_8bit_in_uint32_t); } + #pragma unroll + for (int m = 0; m < size<1>(tensor); ++m, block_row_start += block_row_stride) { + uint2 rowcol = make_uint2(block_row_start, block_col_start); + #pragma unroll + for (int n = 0; n < size<2>(tensor) / 2; ++n, ++rowcol.y) { + // if (cute::thread(32, 0)) { printf("m = %d, n = %d, row = %d, col = %d\n", m, n, int(rowcol.x), int(rowcol.y));} + uint4 random_uint4 = flash::philox(seed, reinterpret_cast(rowcol), offset); + // if (cute::thread0()) { printf("philox = %u, %d, %d, %d\n", random_uint4.x, random_uint4.y, random_uint4.z, random_uint4.w);} + uint8_t (&rnd_8)[16] = reinterpret_cast(random_uint4); + // Special implementation for 16-bit types: we duplicate the threshold to the + // low and high 16 bits of a 32-bit value, then use the f16x2 comparison instruction + // to get a mask. The low 16 bits of the mask will be either 0xffff or 0x0000, + // and the high 16 bits will be either 0xffff or 0x0000, depending on whether + // the random value is less than the threshold. + // We then do a bit-wise AND between the mask and the original value (in 32-bit). + // We're exploiting the fact that floating point comparison is equivalent to integer + // comparison, since we're comparing unsigned integers whose top 8-bits are zero. + if (!encode_dropout_in_sign_bit + && (std::is_same::value || std::is_same::value)) { + uint16_t rnd_16[16]; + #pragma unroll + for (int i = 0; i < 16; i++) { rnd_16[i] = uint16_t(rnd_8[i]); } + uint32_t (&rnd_32)[8] = reinterpret_cast(rnd_16); + #pragma unroll + for (int j = 0; j < 2; j++) { + Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); + // if (cute::thread0()) { printf("random = 0x%x, 0x%x, 0x%x, 0x%x\n", rnd_32[j * 4 + 0], rnd_32[j * 4 + 1], rnd_32[j * 4 + 2], rnd_32[j * 4 + 3]); } + // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } + #pragma unroll + for (int i = 0; i < 4; i++) { + uint32_t mask; + asm volatile("set.le.u32.f16x2 %0, %1, %2;\n" : "=r"(mask) : "r"(rnd_32[j * 4 + i]), "r"(p_dropout_8bit_in_uint32_t)); + tensor_uint32(i) &= mask; + } + // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } + } + } else { + #pragma unroll + for (int j = 0; j < 2; j++) { + #pragma unroll + for (int i = 0; i < 8; i++) { + tensor(i, m, n * 2 + j) = encode_dropout(rnd_8[j * 8 + i] <= p_dropout_in_uint8_t, tensor(i, m, n * 2 + j)); + } + Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); + // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } + } + } + // // if ((threadIdx.x == 0) && (blockIdx.x == 0) && (blockIdx.y == 0)) { + // // printf("n = %d, ph Philox: %u, %u, %u, %u\n", n, rnd_8.x, rnd_8.y, rnd_8.z, rnd_8.w); + // // } + } + } +} + +} // namespace flash diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/static_switch.h b/external_libs/runtime/flash_attn/lib/static_switch.h similarity index 100% rename from runtime/lib/backends/cuda/providers/default/flash_attn/kernels/static_switch.h rename to external_libs/runtime/flash_attn/lib/static_switch.h diff --git a/external_libs/runtime/flash_attn/lib/utils.h b/external_libs/runtime/flash_attn/lib/utils.h new file mode 100644 index 000000000..edf6a60a7 --- /dev/null +++ b/external_libs/runtime/flash_attn/lib/utils.h @@ -0,0 +1,521 @@ +/****************************************************************************** + * Copyright (c) 2023, Tri Dao. + ******************************************************************************/ + +#pragma once + +#include +#include +#include + +#include + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 +#include +#endif + +#include +#include + +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace flash { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ uint32_t relu2(const uint32_t x); + +template<> +inline __device__ uint32_t relu2(const uint32_t x) { + uint32_t res; + const uint32_t zero = 0u; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + asm volatile("max.f16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); +#else + asm volatile( \ + "{\n" \ + "\t .reg .f16x2 sela;\n" \ + "\t set.gtu.u32.f16x2 sela, %1, %2;\n" \ + "\t and.b32 %0, sela, %1;\n" + "}\n" : "=r"(res) : "r"(x), "r"(zero)); +#endif + return res; +} + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 +template<> +inline __device__ uint32_t relu2(const uint32_t x) { + uint32_t res; + const uint32_t zero = 0u; + asm volatile("max.bf16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); + return res; +} +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + +template +inline __device__ uint32_t convert_relu2(const float2 x); + +template<> +inline __device__ uint32_t convert_relu2(const float2 x) { + uint32_t res; + const uint32_t a = reinterpret_cast(x.x); + const uint32_t b = reinterpret_cast(x.y); + asm volatile("cvt.rn.relu.f16x2.f32 %0, %1, %2;\n" : "=r"(res) : "r"(b), "r"(a)); + return res; +} + +template<> +inline __device__ uint32_t convert_relu2(const float2 x) { + uint32_t res; + const uint32_t a = reinterpret_cast(x.x); + const uint32_t b = reinterpret_cast(x.y); + asm volatile("cvt.rn.relu.bf16x2.f32 %0, %1, %2;\n" : "=r"(res) : "r"(b), "r"(a)); + return res; +} + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct MaxOp { +__device__ inline T operator()(T const & x, T const & y) { return x > y ? x : y; } +}; + +template <> +struct MaxOp { +// This is slightly faster +__device__ inline float operator()(float const &x, float const &y) { return max(x, y); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct SumOp { +__device__ inline T operator()(T const & x, T const & y) { return x + y; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Allreduce { + static_assert(THREADS == 32 || THREADS == 16 || THREADS == 8 || THREADS == 4); + template + static __device__ inline T run(T x, Operator &op) { + constexpr int OFFSET = THREADS / 2; + x = op(x, __shfl_xor_sync(uint32_t(-1), x, OFFSET)); + return Allreduce::run(x, op); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template<> +struct Allreduce<2> { +template +static __device__ inline T run(T x, Operator &op) { + x = op(x, __shfl_xor_sync(uint32_t(-1), x, 1)); + return x; +} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void gemm(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const& tCsA, + Tensor4 const& tCsB, TiledMma tiled_mma, + TiledCopyA smem_tiled_copy_A, TiledCopyB smem_tiled_copy_B, + ThrCopyA smem_thr_copy_A, ThrCopyB smem_thr_copy_B) { + CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M + CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N + CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K + Tensor tCrA_copy_view = smem_thr_copy_A.retile_D(tCrA); + CUTE_STATIC_ASSERT_V(size<1>(tCsA) == size<1>(tCrA_copy_view)); // M + Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); + CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N + if (!A_in_regs) { cute::copy(smem_tiled_copy_A, tCsA(_, _, _0{}), tCrA_copy_view(_, _, _0{})); } + if (!B_in_regs) { cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); } + #pragma unroll + for (int i = 0; i < size<2>(tCrA); ++i) { + if (i < size<2>(tCrA) - 1) { + if (!A_in_regs) { cute::copy(smem_tiled_copy_A, tCsA(_, _, i + 1), tCrA_copy_view(_, _, i + 1)); } + if (!B_in_regs) { cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), tCrB_copy_view(_, _, i + 1)); } + } + cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void gemm_A_in_regs(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const& tCsB, + TiledMma tiled_mma, TiledCopy smem_tiled_copy_B, + ThrCopy smem_thr_copy_B) { + CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M + CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N + CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K + Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); + CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N + cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); + #pragma unroll + for (int i = 0; i < size<2>(tCrA); ++i) { + if (i < size<2>(tCrA) - 1) { + cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), tCrB_copy_view(_, _, i + 1)); + } + cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Convert acc_layout from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N)) +template +inline __device__ auto convert_layout_acc_rowcol(Layout acc_layout) { + static_assert(decltype(size<0>(acc_layout))::value == 4); + static_assert(decltype(rank(acc_layout))::value == 3); + auto l = logical_divide(acc_layout, Shape<_2>{}); // ((2, 2), MMA_M, MMA_N) + // TD [2023-08-13]: Idk why but get<0, 1>(l) doesn't work for Cutlass 3.2, I'm getting + // "int_tuple.hpp(74): error: conversion to inaccessible base class" + // return make_layout(make_layout(get<0, 1>(l), get<1>(l)), make_layout(get<0, 0>(l), get<2>(l))); + return make_layout(make_layout(get<1>(get<0>(l)), get<1>(l)), make_layout(get<0>(get<0>(l)), get<2>(l))); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Convert rowcol_layout from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2) +// if using m16n8k16, or to ((2, 2, 1), MMA_M, MMA_N) if using m16n8k8. +template +inline __device__ auto convert_layout_rowcol_Aregs(Layout rowcol_layout) { + using X = Underscore; + static_assert(decltype(size<0, 0>(rowcol_layout))::value == 2); + static_assert(decltype(size<1, 0>(rowcol_layout))::value == 2); + constexpr int mma_shape_K = get<2>(typename MMA_traits::Shape_MNK{}); + static_assert(mma_shape_K == 8 || mma_shape_K == 16); + constexpr int MMA_N_divisor = mma_shape_K == 8 ? 1 : 2; + auto l = logical_divide(rowcol_layout, Shape>>{}); // ((2, MMA_M), (2, (2, MMA_N / 2))) + // TD [2023-08-13]: Same error as above on Cutlass 3.2 + // return make_layout(make_layout(get<1, 0>(l), get<0, 0>(l), get<1, 1, 0>(l)), + // get<0, 1>(l), + // get<1, 1, 1>(l)); + return make_layout(make_layout(get<0>(get<1>(l)), get<0>(get<0>(l)), get<0>(get<1>(get<1>(l)))), + get<1>(get<0>(l)), + get<1>(get<1>(get<1>(l)))); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ auto convert_type(Tensor const &tensor) { + using From_type = typename Engine::value_type; + constexpr int numel = decltype(size(tensor))::value; + cutlass::NumericArrayConverter convert_op; + // HACK: this requires tensor to be "contiguous" + auto frag = convert_op(*reinterpret_cast *>(tensor.data())); + return make_tensor(make_rmem_ptr(&frag), tensor.layout()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void relu_(Tensor &tensor) { + constexpr int numel = decltype(size(tensor))::value; + static_assert(numel % 2 == 0); + using value_t = typename Engine::value_type; + // HACK: this requires tensor to be "contiguous" + Tensor tensor_uint32 = recast(tensor); + #pragma unroll + for (int i = 0; i < size(tensor_uint32); ++i) { + tensor_uint32(i) = relu2(tensor_uint32(i)); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// On SM80 and above, we can fuse fp32 -> fp16/bf16 conversion and relu into 1 instruction +template +inline __device__ auto convert_type_relu(Tensor const &tensor) { + using From_type = typename Engine::value_type; + static_assert(std::is_same_v || std::is_same_v); + static_assert(std::is_same_v); + constexpr int numel = decltype(size(tensor))::value; + static_assert(numel % 2 == 0); +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + // HACK: this requires tensor to be "contiguous" + Tensor tensor_float2 = recast(tensor); + Tensor out_uint32 = make_tensor(tensor_float2.layout()); + #pragma unroll + for (int i = 0; i < size(out_uint32); ++i) { + out_uint32(i) = convert_relu2(tensor_float2(i)); + } + Tensor out = make_tensor(make_rmem_ptr(out_uint32.data()), tensor.layout()); +#else + Tensor out = flash::convert_type(tensor); + flash::relu_(out); +#endif + return out; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Blocks until all but N previous cp.async.commit_group operations have committed. +// This differs from cute::cp_async_wait in that when N = 0 we don't call cp.async.wait_all +// (which is equivalent to commit_group then wait_group 0). +// Instead we just call cp.async.wait_group 0, which is slightly faster. +// https://github.com/NVIDIA/cutlass/blob/master/include/cute/arch/copy_sm80.hpp#L113 +template +CUTE_HOST_DEVICE +void cp_async_wait() { +#if defined(CUTE_ARCH_CP_ASYNC_SM80_ENABLED) + asm volatile("cp.async.wait_group %0;\n" :: "n"(N)); +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void copy(TiledCopy tiled_copy, Tensor const &S, + Tensor &D, Tensor const &identity_MN, + Tensor const &predicate_K, const int max_MN=0) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K + // There's no case where !Clear_OOB_K && Clear_OOB_MN + static_assert(!(Clear_OOB_MN && !Clear_OOB_K)); + #pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { + #pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || predicate_K(k)) { + cute::copy(tiled_copy, S(_, m, k), D(_, m, k)); + } else if (Clear_OOB_K) { + cute::clear(D(_, m, k)); + } + } + } else if (Clear_OOB_MN) { + cute::clear(D(_, m, _)); + } + } + // TD [2023-04-13]: Strange that the code below can cause race condition. + // I think it's because the copies are under an if statement. + // if (Is_even_K) { + // #pragma unroll + // for (int m = 0; m < size<1>(S); ++m) { + // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { + // copy(tiled_copy, S(_, m, _), D(_, m, _)); + // } else if (Clear_OOB_MN) { + // clear(D(_, m, _)); + // } + // } + // } else { // It's slightly faster in this case if iterate over K first + // #pragma unroll + // for (int k = 0; k < size<2>(S); ++k) { + // if (predicate_K(k)) { + // #pragma unroll + // for (int m = 0; m < size<1>(S); ++m) { + // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { + // copy(tiled_copy, S(_, m, k), D(_, m, k)); + // } else if (Clear_OOB_MN) { + // clear(D(_, m, k)); + // } + // } + // } else if (Clear_OOB_K) { // There's no case where !Clear_OOB_K && Clear_OOB_MN + // if (Clear_OOB_MN || Is_even_MN) { + // clear(D(_, _, k)); + // } else { + // #pragma unroll + // for (int m = 0; m < size<1>(S); ++m) { + // if (!(Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN)) { + // clear(D(_, m, k)); + // } + // } + // } + // } + // } + // } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void copy_w_min_idx(Tensor const &S, + Tensor &D, Tensor const &identity_MN, + Tensor const &predicate_K, + const int max_MN=0, const int min_MN=0) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K + // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, max_MN = %d, min_MN = %d\n", blockIdx.y, max_MN, min_MN); } + #pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, m = %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); } + if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) { + // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("Inner loop, blockIdx.y = %d, m = %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); } + #pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || predicate_K(k)) { + cute::copy(S(_, m, k), D(_, m, k)); + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void copy_rotary_interleaved(Tensor const &S, + Tensor &D, + Tensor const &Cos, + Tensor const &Sin, + Tensor const &identity_MN, + const int max_MN, const int min_MN, + const int dim, const int rotary_dim) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin)); // MMA_K + CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin)); // MMA_K + static_assert(decltype(size<0>(S))::value == decltype(size<0>(Cos))::value * 2); + static_assert(decltype(size<0>(Cos))::value % 2 == 0); // Since we do fast conversion from fp16/bf16 to fp32 + Tensor rCos = make_fragment_like(Cos); + Tensor rSin = make_fragment_like(Sin); + Tensor rS = make_fragment_like(S); + #pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) { + #pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) { + cute::copy(S(_, m, k), rS(_, m, k)); + if (get<1>(identity_MN(0, 0, k)) < rotary_dim) { + cute::copy(Cos(_, m, k), rCos(_, m, k)); + cute::copy(Sin(_, m, k), rSin(_, m, k)); + Tensor S_fp32 = convert_type(rS(_, m, k)); + Tensor cos_fp32 = convert_type(rCos(_, m, k)); + Tensor sin_fp32 = convert_type(rSin(_, m, k)); + #pragma unroll + for (int i = 0; i < size<0>(rS) / 2; ++i) { + float real = S_fp32(2 * i) * cos_fp32(i) - S_fp32(2 * i + 1) * sin_fp32(i); + float imag = S_fp32(2 * i) * sin_fp32(i) + S_fp32(2 * i + 1) * cos_fp32(i); + S_fp32(2 * i) = real; + S_fp32(2 * i + 1) = imag; + } + // Idk but I need to copy for the convert_type to work + Tensor S_fp32_copy = make_fragment_like(S_fp32); + cute::copy(S_fp32, S_fp32_copy); + using T = typename Engine0::value_type; + Tensor S_og_type = convert_type(S_fp32_copy); + cute::copy(S_og_type, rS(_, m, k)); + } + cute::copy(rS(_, m, k), D(_, m, k)); + } else if (Clear_OOB_K) { + cute::clear(D(_, m, k)); + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline __device__ void copy_rotary_contiguous(Tensor const &S, + Tensor &D, + Tensor const &Cos, + Tensor const &Sin, + Tensor const &identity_MN, + const int max_MN, const int min_MN, + const int dim, const int rotary_dim) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin)); // MMA_K + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(Cos)); // MMA + CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin)); + static_assert(decltype(size<0>(Cos))::value % 2 == 0); // Since we do fast conversion from fp16/bf16 to fp32 + Tensor rCos = make_fragment_like(Cos); + Tensor rSin = make_fragment_like(Sin); + Tensor rS = make_fragment_like(S); + Tensor rS_other = make_fragment_like(rS(_, 0, 0)); + #pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) { + #pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) { + cute::copy(S(_, m, k), rS(_, m, k)); + if (get<1>(identity_MN(0, 0, k)) < rotary_dim) { + const bool is_left = get<1>(identity_MN(0, 0, k)) < rotary_dim / 2; + Tensor gS_other = make_tensor(S(_, m, k).data() + (is_left ? rotary_dim / 2 : -rotary_dim / 2), S(_, m, k).layout()); + cute::copy(gS_other, rS_other); + // if (cute::thread0()) { print_tensor(rS(_, m, k)); print_tensor(rS_other); } + Tensor gCos = make_tensor(Cos(_, m, k).data() + (is_left ? 0 : -rotary_dim / 2), Cos(_, m, k).layout()); + Tensor gSin = make_tensor(Sin(_, m, k).data() + (is_left ? 0 : -rotary_dim / 2), Sin(_, m, k).layout()); + cute::copy(gCos, rCos(_, m, k)); + cute::copy(gSin, rSin(_, m, k)); + // if (cute::thread0()) { print_tensor(rCos(_, m, k)); print_tensor(rSin(_, m, k)); } + Tensor S_fp32 = convert_type(rS(_, m, k)); + Tensor S_other_fp32 = convert_type(rS_other); + Tensor cos_fp32 = convert_type(rCos(_, m, k)); + Tensor sin_fp32 = convert_type(rSin(_, m, k)); + #pragma unroll + for (int i = 0; i < size<0>(rS); ++i) { + S_fp32(i) = S_fp32(i) * cos_fp32(i) + S_other_fp32(i) * (is_left ? -sin_fp32(i) : sin_fp32(i)); + } + // Idk but I need to copy for the convert_type to work + Tensor S_fp32_copy = make_fragment_like(S_fp32); + cute::copy(S_fp32, S_fp32_copy); + using T = typename Engine0::value_type; + Tensor S_og_type = convert_type(S_fp32_copy); + cute::copy(S_og_type, rS(_, m, k)); + // if (cute::thread0()) { print_tensor(rS(_, m, k)); } + } + cute::copy(rS(_, m, k), D(_, m, k)); + } else if (Clear_OOB_K) { + cute::clear(D(_, m, k)); + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace flash diff --git a/runtime/cmake/CMakeLists.txt b/runtime/cmake/CMakeLists.txt index af719dd09..e435b01d8 100644 --- a/runtime/cmake/CMakeLists.txt +++ b/runtime/cmake/CMakeLists.txt @@ -39,30 +39,10 @@ option(brt_CROSS_COMPILING "Cross compiling for another platform" OFF) # Parameters for LLVM set(LLVM_INSTALL_PATH "" CACHE STRING "The path to the installed LLVM library") -set(FLASH_ATTN_INSTALL_PATH "" CACHE STRING "The path to the installed flash attn library") -if(FLASH_ATTN_INSTALL_PATH AND NOT brt_USE_CUDA) - message(FATAL_ERROR "config FLASH_ATTN_INSTALL_PATH=... must with brt_USE_CUDA=ON") -endif() - if(brt_USE_NCCL AND NOT brt_USE_CUDA) message(FATAL_ERROR "brt_USE_NCCL=ON must with brt_USE_CUDA=ON") endif() - -set(brt_ENABLE_FLASH_ATTENTION false) -if(FLASH_ATTN_INSTALL_PATH) - set(brt_ENABLE_FLASH_ATTENTION true) -endif() - -option(brt_BUILD_FLASH_ATTN "build flash attention shared library" OFF) - -if(brt_BUILD_FLASH_ATTN) - if(FLASH_ATTN_INSTALL_PATH) - message(FATAL_ERROR "config FLASH_ATTN_INSTALL_PATH=... must with brt_BUILD_FLASH_ATTN=OFF") - endif() - set(brt_ENABLE_FLASH_ATTENTION true) -endif() - # Optimizations Related option(brt_ENABLE_LTO "Enable LTO" OFF) @@ -243,9 +223,6 @@ set(brt_LINK_DIRS ) # link CUDADNN if(brt_USE_CUDA) message(STATUS "brt_USE_CUDA On") - if (brt_ENABLE_FLASH_ATTENTION) - message(STATUS "brt_ENABLE_FLASH_ATTENTION On") - endif() find_package(CUDAToolkit REQUIRED) include_directories("${CUDAToolkit_INCLUDE_DIRS}") message("CUDAToolkit Include Dirs = ${CUDAToolkit_INCLUDE_DIRS}") @@ -280,10 +257,6 @@ if (brt_USE_CUDA) list(APPEND BRT_PROVIDER_FLAGS -DBRT_USE_CUDA=1) list(APPEND BRT_PROVIDER_CMAKE_FLAGS -Dbrt_USE_CUDA=1) list(APPEND BRT_PROVIDER_NAMES cuda) - if (brt_ENABLE_FLASH_ATTENTION) - list(APPEND BRT_PROVIDER_FLAGS -DBRT_ENABLE_FLASH_ATTENTION=1) - list(APPEND BRT_PROVIDER_CMAKE_FLAGS -Dbrt_ENABLE_FLASH_ATTENTION=1) - endif() endif() if (brt_USE_NCCL) diff --git a/runtime/cmake/brt_provider_cuda.cmake b/runtime/cmake/brt_provider_cuda.cmake index b5a22fc6a..b3bada6d2 100644 --- a/runtime/cmake/brt_provider_cuda.cmake +++ b/runtime/cmake/brt_provider_cuda.cmake @@ -13,36 +13,6 @@ list(FILTER brt_cuda_provider_srcs EXCLUDE REGEX ".*/backends/cuda/providers/default/flash_attn/.*" ) -if (brt_BUILD_FLASH_ATTN) - file(GLOB_RECURSE brt_cuda_provider_sm_80_cuda_srcs CONFIGURE_DEPENDS - "${LIB_ROOT}/backends/cuda/providers/default/flash_attn/kernels/*.h" - "${LIB_ROOT}/backends/cuda/providers/default/flash_attn/kernels/*.cu" - ) - set_source_files_properties(${brt_cuda_provider_sm_80_cuda_srcs} - PROPERTIES COMPILE_FLAGS "-gencode=arch=compute_80,code=sm_80") - - file(GLOB_RECURSE brt_cuda_provider_sm_80_cpp_srcs CONFIGURE_DEPENDS - "${BRT_INCLUDE_DIR}/brt/backends/cuda/providers/default/flash_attn/*.h" - "${LIB_ROOT}/backends/cuda/providers/default/flash_attn/*.cc" - "${LIB_ROOT}/backends/cuda/providers/default/flash_attn/kernels/*.cc" - ) - list(APPEND brt_cuda_provider_sm_80_cuda_srcs ${brt_cuda_provider_sm_80_cpp_srcs}) - brt_add_shared_library(brt_flash_attn_cuda ${brt_cuda_provider_sm_80_cuda_srcs}) - target_include_directories(brt_flash_attn_cuda PRIVATE SYSTEM ${REPO_ROOT}/../external/half/include) - target_include_directories(brt_flash_attn_cuda PRIVATE SYSTEM "${CUTLASS_ROOT}/include" "${CUTLASS_ROOT}/tools/util/include") - # we know that all brt::* symbols are defined/linked in brt_provider_cuda library - target_link_libraries(brt_flash_attn_cuda PRIVATE -Wl,--unresolved-symbols=ignore-in-object-files) - - set_target_properties(brt_flash_attn_cuda PROPERTIES LINKER_LANGUAGE CXX) - set_target_properties(brt_flash_attn_cuda PROPERTIES FOLDER "Brt") - - install( - TARGETS brt_flash_attn_cuda - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") -endif() - - list(APPEND brt_all_providers_srcs ${brt_cuda_provider_srcs}) list(APPEND brt_all_includes brt_device_cuda) @@ -52,18 +22,6 @@ brt_add_object_library(brt_provider_cuda ${brt_cuda_provider_srcs}) # cutlass target_include_directories(brt_provider_cuda PUBLIC "${CUTLASS_ROOT}/include" "${CUTLASS_ROOT}/tools/util/include") -# add flash attention dependencies if any -if(FLASH_ATTN_INSTALL_PATH) - target_link_libraries(brt_provider_cuda ${FLASH_ATTN_INSTALL_PATH}) - install( - FILES "${FLASH_ATTN_INSTALL_PATH}" - DESTINATION "${CMAKE_INSTALL_LIBDIR}") -endif() - -if (brt_BUILD_FLASH_ATTN) - target_link_libraries(brt_provider_cuda brt_flash_attn_cuda) -endif() - target_link_libraries(brt_provider_cuda brt_framework brt_common brt_ir) target_link_libraries(brt_provider_cuda ${BRT_CUDA_LIBRARIES}) brt_add_include_to_target(brt_provider_cuda ${brt_all_includes}) diff --git a/runtime/include/brt/backends/cuda/providers/default/flash_attn/op_registration.h b/runtime/include/brt/backends/cuda/providers/default/custom/op_registration.h similarity index 94% rename from runtime/include/brt/backends/cuda/providers/default/flash_attn/op_registration.h rename to runtime/include/brt/backends/cuda/providers/default/custom/op_registration.h index 3ce9abfc1..a12fc0923 100644 --- a/runtime/include/brt/backends/cuda/providers/default/flash_attn/op_registration.h +++ b/runtime/include/brt/backends/cuda/providers/default/custom/op_registration.h @@ -21,7 +21,7 @@ namespace brt { class KernelRegistry; namespace cuda { -void RegisterFlashAttentionOps(KernelRegistry *registry); +void RegisterCustomOps(KernelRegistry *registry); } // namespace cuda } // namespace brt diff --git a/runtime/include/brt/core/framework/op_accessor.h b/runtime/include/brt/core/framework/op_accessor.h index 352a63228..39f404dde 100644 --- a/runtime/include/brt/core/framework/op_accessor.h +++ b/runtime/include/brt/core/framework/op_accessor.h @@ -70,6 +70,8 @@ class OpAccessor { template std::vector GetAttrAsVector(const std::string &name) const; + void *GetAttrAsVoidPtr(const std::string &name) const; + std::string GetUID() const; static int64_t GetNumElementsOfShape(const Shape &shape); diff --git a/runtime/lib/backends/cuda/providers/default/cuda_provider.cc b/runtime/lib/backends/cuda/providers/default/cuda_provider.cc index 56791e8fd..b362f469b 100644 --- a/runtime/lib/backends/cuda/providers/default/cuda_provider.cc +++ b/runtime/lib/backends/cuda/providers/default/cuda_provider.cc @@ -23,6 +23,7 @@ #include "brt/backends/cuda/providers/default/ait/op_registration.h" #include "brt/backends/cuda/providers/default/codegen/op_registration.h" #include "brt/backends/cuda/providers/default/copy/op_registration.h" +#include "brt/backends/cuda/providers/default/custom/op_registration.h" #include "brt/backends/cuda/providers/default/indexing/op_registration.h" #include "brt/backends/cuda/providers/default/math/op_registration.h" #include "brt/backends/cuda/providers/default/normalization/op_registration.h" @@ -32,10 +33,6 @@ #include "brt/core/session/session.h" #include -#if BRT_ENABLE_FLASH_ATTENTION -#include "brt/backends/cuda/providers/default/flash_attn/op_registration.h" -#endif - using namespace brt; using namespace brt::common; @@ -51,9 +48,7 @@ BRT_STATIC_KERNEL_REGISTRATION( cuda::RegisterAITOps(registry); cuda::RegisterCodegenOps(registry); cuda::RegisterCopyOps(registry); -#if BRT_ENABLE_FLASH_ATTENTION - cuda::RegisterFlashAttentionOps(registry); -#endif + cuda::RegisterCustomOps(registry); cuda::RegisterIndexingOps(registry); cuda::RegisterMathOps(registry); cuda::RegisterNormalizationOps(registry); diff --git a/runtime/lib/backends/cuda/providers/default/custom/custom.cc b/runtime/lib/backends/cuda/providers/default/custom/custom.cc new file mode 100644 index 000000000..01a49755f --- /dev/null +++ b/runtime/lib/backends/cuda/providers/default/custom/custom.cc @@ -0,0 +1,87 @@ +//===- copy.cc ------------------------------------------------*--- C++ -*-===// +// +// Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include "./custom.h" +#include "brt/backends/cuda/device/cuda_work_queue.h" +#include "brt/core/framework/op_accessor.h" +#include "brt/core/ir/util.h" +#include "byteir/Dialect/Byre/ByreDialect.h" +#include "mlir/IR/BuiltinOps.h" // ModuleOp +#include +#include +#include +#include + +using namespace brt; +using namespace brt::common; +using namespace brt::cuda; +using namespace brt::ir; +using namespace llvm; +using namespace mlir; + +namespace brt { +namespace cuda { + +CustomOpKernel::CustomOpKernel(const OpKernelInfo &info) : OpKernel(info) { + OpAccessor accessor(info_); + std::string lib_path = accessor.GetAttrAsString("lib_path"); + std::string api_name = accessor.GetAttrAsString("api_name"); + custom_lib_hdl = dlopen(lib_path.c_str(), RTLD_LAZY | RTLD_GLOBAL); + // std::cout << "Current path is " << std::filesystem::current_path() << '\n'; + // std::cout << "API name is " << api_name << '\n'; + std::string msg = std::string("Custom lib ") + lib_path + " load failed"; + BRT_ENFORCE(custom_lib_hdl != nullptr, msg); + run_func_ = reinterpret_cast( + dlsym(custom_lib_hdl, api_name.c_str())); + std::string api_msg = std::string("Couldn't find function: ") + api_name; + BRT_ENFORCE(run_func_ != NULL, api_msg); +} + +int64_t getIntFromVoidPtr(void *data, size_t &pos) { + int64_t *intPtr = + reinterpret_cast(static_cast(data) + pos); + pos += sizeof(int64_t); + return *intPtr; +} + +float getFloatFromVoidPtr(void *data, size_t &pos) { + float *floatPtr = reinterpret_cast(static_cast(data) + pos); + pos += sizeof(float); + return *floatPtr; +} + +common::Status CustomOpKernel::RunImpl(const ExecutionContext &ctx) { + OpAccessor accessor(info_, ctx.exec_frame); + void **tensor_args = new void *[accessor.GetNumArgs()]; + for (size_t i = 0; i < accessor.GetNumArgs(); ++i) { + tensor_args[i] = accessor.GetArgAsyncValueRef(i); + } + + // TODO: what about string?? + void *extra_args = accessor.GetAttrAsVoidPtr("extra_args"); + cudaStream_t stream = + static_cast(ctx.work_queue)->GetComputeStream(); + + run_func_(tensor_args, extra_args, stream); + // need to free extra_args since there is a mallocnbg= + free(extra_args); + delete[] tensor_args; + return common::Status::OK(); +} + +} // namespace cuda +} // namespace brt diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_bwd.h b/runtime/lib/backends/cuda/providers/default/custom/custom.h similarity index 71% rename from runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_bwd.h rename to runtime/lib/backends/cuda/providers/default/custom/custom.h index 2d8c40272..c4ced2479 100644 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_bwd.h +++ b/runtime/lib/backends/cuda/providers/default/custom/custom.h @@ -1,5 +1,4 @@ -//===- flash_attn_bwd.h -------------------------------------------------*--- -// C++ -*-===// +//===- custom.h -----------------------------------------------*--- C++ -*-===// // // Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,16 +17,22 @@ #pragma once -#include "brt/core/framework/dtype.h" #include "brt/core/framework/op_kernel.h" +#include namespace brt { namespace cuda { -class FlashAttnBwdOpKernel final : public OpKernel { + +class CustomOpKernel final : public OpKernel { public: - explicit FlashAttnBwdOpKernel(const OpKernelInfo &info); + typedef void (*CustomLibApiRun)(void **, void *, cudaStream_t); + explicit CustomOpKernel(const OpKernelInfo &info); common::Status RunImpl(const ExecutionContext &) override; + +private: + void *custom_lib_hdl; + CustomLibApiRun run_func_ = nullptr; }; } // namespace cuda -} // namespace brt \ No newline at end of file +} // namespace brt diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/op_registration.cc b/runtime/lib/backends/cuda/providers/default/custom/op_registration.cc similarity index 67% rename from runtime/lib/backends/cuda/providers/default/flash_attn/op_registration.cc rename to runtime/lib/backends/cuda/providers/default/custom/op_registration.cc index 01fabd43f..9015da55a 100644 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/op_registration.cc +++ b/runtime/lib/backends/cuda/providers/default/custom/op_registration.cc @@ -15,25 +15,21 @@ // //===----------------------------------------------------------------------===// -#include "brt/backends/cuda/providers/default/flash_attn/op_registration.h" -#include "./flash_attn_bwd.h" -#include "./flash_attn_fwd.h" +#include "brt/backends/cuda/providers/default/custom/op_registration.h" + +#include "./custom.h" #include "brt/core/framework/kernel_registry.h" namespace brt { namespace cuda { -void RegisterFlashAttentionOps(KernelRegistry *registry) { +void RegisterCustomOps(KernelRegistry *registry) { registry->Register( - "byteir.flash_attn_fwd", + "custom", [](const brt::OpKernelInfo &info) -> std::shared_ptr { - return std::make_shared(info); - }); - registry->Register( - "byteir.flash_attn_bwd", - [](const brt::OpKernelInfo &info) -> std::shared_ptr { - return std::make_shared(info); + return std::make_shared(info); }); } + } // namespace cuda } // namespace brt diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_bwd.cc b/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_bwd.cc deleted file mode 100644 index 4f9da6ae5..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_bwd.cc +++ /dev/null @@ -1,299 +0,0 @@ -//===- flash_attn_bwd.cc -----------------------------------*---C++ -*-===// -// -// Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// - -#include "./flash_attn_bwd.h" -#include "./kernels/flash_api.h" -#include "brt/backends/cuda/device/common/util.h" -#include "brt/backends/cuda/device/cuda_allocator.h" -#include "brt/backends/cuda/device/cuda_work_queue.h" -#include "brt/core/common/common.h" -#include "brt/core/framework/op_accessor.h" -#include -#include -#include - -#define InvalidArgs(msg) \ - common::Status(common::StatusCategory::BRT, \ - common::StatusCode::INVALID_ARGUMENT, msg); - -#define ShapeCheck(shape, batch_size, seqlen, num_heads, head_size_og) \ - if (shape[0] != batch_size || shape[1] != seqlen || shape[2] != num_heads || \ - shape[3] != head_size_og) { \ - return InvalidArgs("flash attn shape check failed"); \ - } - -namespace brt { -namespace cuda { -FlashAttnBwdOpKernel::FlashAttnBwdOpKernel(const OpKernelInfo &info) - : OpKernel(info, false, false, false, false) {} - -// byre.compute @byteir.flash_attn_bwd(dout, q, k, v, out, softmax_lse, -// rng_state, dq, dk, dv, dsoftmax_sum_ptr) {causal, -// dropout_p,softmax_scale, dq_accum_ptr} -common::Status FlashAttnBwdOpKernel::RunImpl(const ExecutionContext &ctx) { - OpAccessor accessor(info_, ctx.exec_frame); - // args - void *dout_ptr = accessor.GetArgAsyncValueRef(0); - void *q_ptr = accessor.GetArgAsyncValueRef(1); - void *k_ptr = accessor.GetArgAsyncValueRef(2); - void *v_ptr = accessor.GetArgAsyncValueRef(3); - void *out_ptr = accessor.GetArgAsyncValueRef(4); - void *softmax_lse_ptr = accessor.GetArgAsyncValueRef(5); - void *rng_state_ptr = accessor.GetArgAsyncValueRef(6); // TODO : handle rng - void *dq_ptr = accessor.GetArgAsyncValueRef(7); - void *dk_ptr = accessor.GetArgAsyncValueRef(8); - void *dv_ptr = accessor.GetArgAsyncValueRef(9); - void *dsoftmax_ptr = accessor.GetArgAsyncValueRef(10); - void *dq_accum_ptr = accessor.GetArgAsyncValueRef(11); - - // attr - const bool is_causal = accessor.GetAttrAsBool("causal"); - const float p_dropout = accessor.GetAttrAsFloat("dropout_p"); - const float softmax_scale = accessor.GetAttrAsFloat("softmax_scale"); - - // device compute capability check - int nDevices; - - cudaGetDeviceCount(&nDevices); - bool support_sm8x = false; - bool support_sm80 = false; - bool support_sm90 = false; - for (int i = 0; i < nDevices; i++) { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, i); - support_sm8x = support_sm8x || (prop.major == 8 && prop.minor >= 0); - support_sm80 = support_sm80 || (prop.major == 8 && prop.minor == 0); - support_sm90 = support_sm90 || (prop.major == 9 && prop.minor == 0); - } - - if (!support_sm8x && !support_sm90) { - return InvalidArgs("FlashAttention only supports Ampere GPUs or newer."); - } - - // dropout check - // bool is_dropout = p_dropout > 0.0; - // if (is_dropout) { - // return InvalidArgs("currently, we only support p_dropout == 0"); - // } - - // type check - const auto dout_type = accessor.GetArgDTypeEnum(0); - const auto q_type = accessor.GetArgDTypeEnum(1); - const auto k_type = accessor.GetArgDTypeEnum(2); - const auto v_type = accessor.GetArgDTypeEnum(3); - const auto out_type = accessor.GetArgDTypeEnum(4); - const auto dq_type = accessor.GetArgDTypeEnum(7); - const auto dk_type = accessor.GetArgDTypeEnum(8); - const auto dv_type = accessor.GetArgDTypeEnum(9); - - // if (q_type != DTypeEnum::Float16 || q_type != DTypeEnum::BFloat16) { - // return InvalidArgs("FlashAttention only support fp16 and bf16 data - // type"); - // } - // if (dout_type != q_type || k_type != q_type || v_type != q_type || - // out_type != q_type || dq_type != q_type || dk_type != q_type || - // dv_type != q_type) { - // return InvalidArgs("Args must have the same dtype"); - // } - - // shepe check - const auto dout_shape = accessor.GetArgShape(0); - const auto q_shape = accessor.GetArgShape(1); - const auto k_shape = accessor.GetArgShape(2); - const auto v_shape = accessor.GetArgShape(3); - const auto out_shape = accessor.GetArgShape(4); - const auto dq_shape = accessor.GetArgShape(7); - const auto dk_shape = accessor.GetArgShape(8); - const auto dv_shape = accessor.GetArgShape(9); - const auto dsoftmax_shape = accessor.GetArgShape(10); - const auto dq_accum_shape = accessor.GetArgShape(11); - int64_t o_rank = out_shape.size(); - int64_t q_rank = q_shape.size(); - int64_t k_rank = k_shape.size(); - int64_t v_rank = v_shape.size(); - if (o_rank != 4 || q_rank != 4 || k_rank != 4 || v_rank != 4) { - return InvalidArgs("flash-attn expects input tensors of rank 4."); - } - - const int batch_size_o = out_shape[0]; - const int seqlen_o = out_shape[1]; - const int num_heads_o = out_shape[2]; - const int head_size_og_o = out_shape[3]; - const int batch_size_q = q_shape[0]; - const int seqlen_q = q_shape[1]; - const int num_heads_q = q_shape[2]; - const int head_size_og_q = q_shape[3]; - const int batch_size_k = k_shape[0]; - const int seqlen_k = k_shape[1]; - const int num_heads_k = k_shape[2]; - const int head_size_og_k = k_shape[3]; - const int batch_size_v = v_shape[0]; - const int seqlen_v = v_shape[1]; - const int num_heads_v = v_shape[2]; - const int head_size_og_v = v_shape[3]; - if (batch_size_q <= 0) { - return InvalidArgs("batch size must be postive"); - } - if (head_size_og_q > 256) { - return InvalidArgs( - "FlashAttention backword only supports head dimension at most 256"); - } - if (head_size_og_q > 192 && !support_sm80 && !support_sm90) { - return InvalidArgs("FlashAttention backward for head dim > 192 requires " - "A100/A800 or H100/H800"); - } - if (num_heads_q % num_heads_k != 0) { - return InvalidArgs( - "Number of heads in key/value must divide number of heads in query"); - } - ShapeCheck(out_shape, batch_size_q, seqlen_q, num_heads_q, head_size_og_q); - ShapeCheck(dq_shape, batch_size_q, seqlen_q, num_heads_q, head_size_og_q); - ShapeCheck(dout_shape, batch_size_q, seqlen_q, num_heads_q, head_size_og_q); - ShapeCheck(k_shape, batch_size_q, seqlen_k, num_heads_k, head_size_og_q); - ShapeCheck(v_shape, batch_size_q, seqlen_k, num_heads_k, head_size_og_q); - ShapeCheck(dk_shape, batch_size_q, seqlen_k, num_heads_k, head_size_og_q); - ShapeCheck(dv_shape, batch_size_q, seqlen_k, num_heads_k, head_size_og_q); - - auto round_multiple = [](int x, int m) { return (x + m - 1) / m * m; }; - const int head_size = round_multiple(head_size_og_q, 8); - const int head_size_rounded = round_multiple(head_size, 32); - const int seqlen_q_rounded = round_multiple(seqlen_q, 128); - const int seqlen_k_rounded = round_multiple(seqlen_k, 128); - - if (dsoftmax_shape[0] != batch_size_q || dsoftmax_shape[1] != num_heads_q || - dsoftmax_shape[2] != seqlen_q_rounded) { - return InvalidArgs("dsoftmax shape check failed."); - } - ShapeCheck(dq_accum_shape, batch_size_q, num_heads_q, seqlen_q_rounded, - head_size_rounded); - - if (head_size_og_q % 8 != 0) { - // TODO: Handle head sizes that are not a multiple of 8 via some padding. - return InvalidArgs("only supports head sizes that are a multiple of 8"); - } - - if (num_heads_k != num_heads_q) { - // TODO: add compiler support when num_heads_k != num_heads_q - // we need to create dk_expanded and dv_expanded when num_heads_k != - // num_heads_q reference in flash attn v2 as follows: - // ====================================== - // at::Tensor dk_expanded, dv_expanded; - // if (num_heads_k != num_heads) { // MQA / GQA - // dk_expanded = torch::empty({batch_size, seqlen_k, num_heads, - // head_size}, opts); dv_expanded = torch::empty({batch_size, seqlen_k, - // num_heads, head_size}, opts); - // } else { - // dk_expanded = dk; - // dv_expanded = dv; - // } - // ====================================== - return InvalidArgs("currently, we only support num_heads_k == num_heads_q"); - } - - // dtype check - DTypeEnum o_dtype = accessor.GetArgDTypeEnum(0); - DTypeEnum q_dtype = accessor.GetArgDTypeEnum(1); - DTypeEnum k_dtype = accessor.GetArgDTypeEnum(2); - DTypeEnum v_dtype = accessor.GetArgDTypeEnum(3); - if (o_dtype != q_dtype || q_dtype != k_dtype || k_dtype != v_dtype) { - return InvalidArgs( - "query, key, value, and output must have the same dtype"); - } - - // bool loop = seqlen_k > blocksize_c; - // TODO: change later, for now set to true for simplicity - bool loop = true; - - cudaStream_t stream = - static_cast(ctx.work_queue)->GetComputeStream(); - - uint32_t q_batch_stride = q_shape[1] * q_shape[2] * q_shape[3]; - uint32_t k_batch_stride = k_shape[1] * k_shape[2] * k_shape[3]; - uint32_t v_batch_stride = v_shape[1] * v_shape[2] * v_shape[3]; - uint32_t o_batch_stride = out_shape[1] * out_shape[2] * out_shape[3]; - uint32_t q_row_stride = q_shape[2] * q_shape[3]; - uint32_t k_row_stride = k_shape[2] * k_shape[3]; - uint32_t v_row_stride = v_shape[2] * v_shape[3]; - uint32_t o_row_stride = out_shape[2] * out_shape[3]; - uint32_t q_head_stride = q_shape[3]; - uint32_t k_head_stride = k_shape[3]; - uint32_t v_head_stride = v_shape[3]; - uint32_t o_head_stride = out_shape[3]; - - // std::cout << "params:" << std::endl; - // std::cout << "q_batch_stride: " << q_batch_stride << std::endl; - // std::cout << "k_batch_stride: " << k_batch_stride << std::endl; - // std::cout << "v_batch_stride: " << v_batch_stride << std::endl; - // std::cout << "o_batch_stride: " << o_batch_stride << std::endl; - // std::cout << "q_row_stride: " << q_row_stride << std::endl; - // std::cout << "k_row_stride: " << k_row_stride << std::endl; - // std::cout << "v_row_stride: " << v_row_stride << std::endl; - // std::cout << "o_row_stride: " << o_row_stride << std::endl; - // std::cout << "q_head_stride: " << q_head_stride << std::endl; - // std::cout << "k_head_stride: " << k_head_stride << std::endl; - // std::cout << "v_head_stride: " << v_head_stride << std::endl; - // std::cout << "o_head_stride: " << o_head_stride << std::endl; - // std::cout << "batch_size_q: " << batch_size_q << std::endl; - // std::cout << "num_heads_q: " << num_heads_q << std::endl; - // std::cout << "num_heads_k: " << num_heads_k << std::endl; - // std::cout << "head_size_og_q: " << head_size_og_q << std::endl; - // std::cout << "head_size_rounded: " << head_size_rounded << std::endl; - // std::cout << "softmax_scale: " << softmax_scale << std::endl; - // std::cout << "seqlen_q: " << seqlen_q << std::endl; - // std::cout << "seqlen_k: " << seqlen_k << std::endl; - // std::cout << "seqlen_q_rounded: " << seqlen_q_rounded << std::endl; - // std::cout << "seqlen_k_rounded: " << seqlen_k_rounded << std::endl; - // std::cout << "is_causal: " << is_causal << std::endl; - - kernel::run_mha_bwd( - q_ptr, k_ptr, v_ptr, out_ptr, dout_ptr, dq_ptr, dk_ptr, dv_ptr, - /* cu_seqlens_q_ptr */ nullptr, - /* cu_seqlens_k_ptr */ nullptr, - /* dq_accum_ptr */ loop ? dq_accum_ptr : nullptr, - /* dk_accum_ptr */ nullptr, - /* dv_accum_ptr */ nullptr, softmax_lse_ptr, dsoftmax_ptr, rng_state_ptr, - /* q_batch_stride */ q_batch_stride, - /* k_batch_stride */ k_batch_stride, - /* v_batch_stride */ v_batch_stride, - /* o_batch_stride */ o_batch_stride, - /* q_row_stride */ q_row_stride, - /* k_row_stride */ k_row_stride, - /* v_row_stride */ v_row_stride, - /* o_row_stride */ o_row_stride, - /* q_head_stride */ q_head_stride, - /* k_head_stride */ k_head_stride, - /* v_head_stride */ v_head_stride, - /* o_head_stride */ o_head_stride, - /* b */ batch_size_q, - /* h */ num_heads_q, - /* h_k */ num_heads_k, - /* d */ head_size_og_q, - /* d_rounded */ head_size_rounded, - /* softmax_scale*/ softmax_scale, - /* seqlen_q */ seqlen_q, - /* seqlen_k */ seqlen_k, - /* seqlen_q_rounded */ seqlen_q_rounded, - /* seqlen_k_rounded */ seqlen_k_rounded, - /* p_dropout */ p_dropout, - /* is_causal */ is_causal, - /* stream */ stream); - - return common::Status::OK(); -} - -} // namespace cuda -} // namespace brt diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.cc b/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.cc deleted file mode 100644 index 5a588ac82..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.cc +++ /dev/null @@ -1,211 +0,0 @@ -//===- flash_attn_fwd.cc -----------------------------------*---C++ -*-===// -// -// Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// - -#include "./flash_attn_fwd.h" -#include "./kernels/flash_api.h" -#include "brt/backends/cuda/device/common/util.h" -#include "brt/backends/cuda/device/cuda_allocator.h" -#include "brt/backends/cuda/device/cuda_work_queue.h" -#include "brt/core/common/common.h" -#include "brt/core/framework/op_accessor.h" -#include -#include - -#define InvalidArgs(msg) \ - common::Status(common::StatusCategory::BRT, \ - common::StatusCode::INVALID_ARGUMENT, msg); - -#define ShapeCheck(shape, batch_size, seqlen, num_heads, head_size_og) \ - if (shape[0] != batch_size || shape[1] != seqlen || shape[2] != num_heads || \ - shape[3] != head_size_og) { \ - return InvalidArgs("flash attn shape check failed"); \ - } - -namespace brt { -namespace cuda { -FlashAttnFwdOpKernel::FlashAttnFwdOpKernel(const OpKernelInfo &info) - : OpKernel(info, false, false, false, false) {} - -// byre.compute @byteir.flash_attn_fwd(q_padded, k_padded, v_padded, out_padded, -// softmax_lse, softmax_ptr, rng_state) {causal, dropout_p, softmax_scale, -// return_softmax} output: out, q_padded, k_padded, v_padded, out_padded, -// softmax_lse, softmax_ptr, rng_state(2xi64) -common::Status FlashAttnFwdOpKernel::RunImpl(const ExecutionContext &ctx) { - OpAccessor accessor(info_, ctx.exec_frame); - // args - void *q_ptr = accessor.GetArgAsyncValueRef(0); - void *k_ptr = accessor.GetArgAsyncValueRef(1); - void *v_ptr = accessor.GetArgAsyncValueRef(2); - void *rng_state_ptr = accessor.GetArgAsyncValueRef(3); - void *o_ptr = accessor.GetArgAsyncValueRef(4); - void *softmax_lse_ptr = accessor.GetArgAsyncValueRef(5); - void *softmax_ptr = accessor.GetArgAsyncValueRef(6); - - // check rng_state - // uint64_t *h_rng_state = new uint64_t[2]; - // cudaMemcpy(h_rng_state, rng_state_ptr, 2 * sizeof(uint64_t), - // cudaMemcpyDeviceToHost); std::cout << h_rng_state[0] << "," << - // h_rng_state[1] << std::endl; cudaDeviceSynchronize(); - - // attr - const bool is_causal = accessor.GetAttrAsBool("causal"); - const float p_dropout = accessor.GetAttrAsFloat("dropout_p"); - const float softmax_scale = accessor.GetAttrAsFloat("softmax_scale"); - const bool return_softmax = accessor.GetAttrAsBool("return_softmax"); - - softmax_ptr = return_softmax ? softmax_ptr : nullptr; - - const auto q_shape = accessor.GetArgShape(0); - const auto k_shape = accessor.GetArgShape(1); - const auto v_shape = accessor.GetArgShape(2); - const auto o_shape = accessor.GetArgShape(4); - int64_t o_rank = o_shape.size(); - int64_t q_rank = q_shape.size(); - int64_t k_rank = k_shape.size(); - int64_t v_rank = v_shape.size(); - if (o_rank != 4 || q_rank != 4 || k_rank != 4 || v_rank != 4) { - return InvalidArgs("flash-attn expects input tensors of rank 4."); - } - - // shape check - const int batch_size_o = o_shape[0]; - const int seqlen_o = o_shape[1]; - const int num_heads_o = o_shape[2]; - const int head_size_og_o = o_shape[3]; - const int batch_size_q = q_shape[0]; - const int seqlen_q = q_shape[1]; - const int num_heads_q = q_shape[2]; - const int head_size_og_q = q_shape[3]; - const int batch_size_k = k_shape[0]; - const int seqlen_k = k_shape[1]; - const int num_heads_k = k_shape[2]; - const int head_size_og_k = k_shape[3]; - const int batch_size_v = v_shape[0]; - const int seqlen_v = v_shape[1]; - const int num_heads_v = v_shape[2]; - const int head_size_og_v = v_shape[3]; - if (batch_size_q <= 0) { - return InvalidArgs("batch size must be postive"); - } - if (head_size_og_q > 256) { - return InvalidArgs( - "FlashAttention forward only supports head dimension at most 256"); - } - if (num_heads_q % num_heads_k != 0) { - return InvalidArgs( - "Number of heads in key/value must divide number of heads in query"); - } - ShapeCheck(o_shape, batch_size_q, seqlen_q, num_heads_q, head_size_og_q); - ShapeCheck(k_shape, batch_size_q, seqlen_k, num_heads_k, head_size_og_q); - ShapeCheck(v_shape, batch_size_q, seqlen_k, num_heads_k, head_size_og_q); - if (head_size_og_q % 8 != 0) { - // TODO: Handle head sizes that are not a multiple of 8 via some padding. - return InvalidArgs("only supports head sizes that are a multiple of 8"); - } - - // dtype check - DTypeEnum q_dtype = accessor.GetArgDTypeEnum(0); - DTypeEnum k_dtype = accessor.GetArgDTypeEnum(1); - DTypeEnum v_dtype = accessor.GetArgDTypeEnum(2); - DTypeEnum o_dtype = accessor.GetArgDTypeEnum(4); - if (o_dtype != q_dtype || q_dtype != k_dtype || k_dtype != v_dtype) { - return InvalidArgs( - "query, key, value, and output must have the same dtype"); - } - - auto round_multiple = [](int x, int m) { return (x + m - 1) / m * m; }; - const int head_size = round_multiple(head_size_og_q, 8); - const int head_size_rounded = round_multiple(head_size, 32); - const int seqlen_q_rounded = round_multiple(seqlen_q, 128); - const int seqlen_k_rounded = round_multiple(seqlen_k, 128); - - cudaStream_t stream = - static_cast(ctx.work_queue)->GetComputeStream(); - - uint32_t q_batch_stride = q_shape[1] * q_shape[2] * q_shape[3]; - uint32_t k_batch_stride = k_shape[1] * k_shape[2] * k_shape[3]; - uint32_t v_batch_stride = v_shape[1] * v_shape[2] * v_shape[3]; - uint32_t o_batch_stride = o_shape[1] * o_shape[2] * o_shape[3]; - uint32_t q_row_stride = q_shape[2] * q_shape[3]; - uint32_t k_row_stride = k_shape[2] * k_shape[3]; - uint32_t v_row_stride = v_shape[2] * v_shape[3]; - uint32_t o_row_stride = o_shape[2] * o_shape[3]; - uint32_t q_head_stride = q_shape[3]; - uint32_t k_head_stride = k_shape[3]; - uint32_t v_head_stride = v_shape[3]; - uint32_t o_head_stride = o_shape[3]; - - // std::cout << "params:" << std::endl; - // std::cout << "q_batch_stride: " << q_batch_stride << std::endl; - // std::cout << "k_batch_stride: " << k_batch_stride << std::endl; - // std::cout << "v_batch_stride: " << v_batch_stride << std::endl; - // std::cout << "o_batch_stride: " << o_batch_stride << std::endl; - // std::cout << "q_row_stride: " << q_row_stride << std::endl; - // std::cout << "k_row_stride: " << k_row_stride << std::endl; - // std::cout << "v_row_stride: " << v_row_stride << std::endl; - // std::cout << "o_row_stride: " << o_row_stride << std::endl; - // std::cout << "q_head_stride: " << q_head_stride << std::endl; - // std::cout << "k_head_stride: " << k_head_stride << std::endl; - // std::cout << "v_head_stride: " << v_head_stride << std::endl; - // std::cout << "o_head_stride: " << o_head_stride << std::endl; - // std::cout << "batch_size_q: " << batch_size_q << std::endl; - // std::cout << "num_heads_q: " << num_heads_q << std::endl; - // std::cout << "num_heads_k: " << num_heads_k << std::endl; - // std::cout << "head_size: " << head_size << std::endl; - // std::cout << "head_size_rounded: " << head_size_rounded << std::endl; - // std::cout << "softmax_scale: " << softmax_scale << std::endl; - // std::cout << "seqlen_q: " << seqlen_q << std::endl; - // std::cout << "seqlen_k: " << seqlen_k << std::endl; - // std::cout << "seqlen_q_rounded: " << seqlen_q_rounded << std::endl; - // std::cout << "seqlen_k_rounded: " << seqlen_k_rounded << std::endl; - // std::cout << "is_causal: " << is_causal << std::endl; - - kernel::run_mha(q_ptr, k_ptr, v_ptr, o_ptr, softmax_lse_ptr, softmax_ptr, - rng_state_ptr, - /* cu_seqlens_q_ptr */ nullptr, - /* cu_seqlens_k_ptr */ nullptr, - /* q_batch_stride */ q_batch_stride, - /* k_batch_stride */ k_batch_stride, - /* v_batch_stride */ v_batch_stride, - /* o_batch_stride */ o_batch_stride, - /* q_row_stride */ q_row_stride, - /* k_row_stride */ k_row_stride, - /* v_row_stride */ v_row_stride, - /* o_row_stride */ o_row_stride, - /* q_head_stride */ q_head_stride, - /* k_head_stride */ k_head_stride, - /* v_head_stride */ v_head_stride, - /* o_head_stride */ o_head_stride, - /* b */ batch_size_q, - /* h */ num_heads_q, - /* h_k */ num_heads_k, - /* d */ head_size, - /* d_rounded */ head_size_rounded, - /* softmax_scale*/ softmax_scale, - /* seqlen_q */ seqlen_q, - /* seqlen_k */ seqlen_k, - /* seqlen_q_rounded */ seqlen_q_rounded, - /* seqlen_k_rounded */ seqlen_k_rounded, - /* p_dropout */ p_dropout, - /* is_causal */ is_causal, - /* stream */ stream); - - return common::Status::OK(); -} - -} // namespace cuda -} // namespace brt diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.h b/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.h deleted file mode 100644 index 2e82f9a3c..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/flash_attn_fwd.h +++ /dev/null @@ -1,33 +0,0 @@ -//===- flash_attn_fwd.h -------------------------------------------------*--- -// C++ -*-===// -// -// Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "brt/core/framework/dtype.h" -#include "brt/core/framework/op_kernel.h" - -namespace brt { -namespace cuda { -class FlashAttnFwdOpKernel final : public OpKernel { -public: - explicit FlashAttnFwdOpKernel(const OpKernelInfo &info); - common::Status RunImpl(const ExecutionContext &) override; -}; - -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/block_info.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/block_info.h deleted file mode 100644 index 7fc1af7a4..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/block_info.h +++ /dev/null @@ -1,56 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -namespace brt { -namespace cuda { -namespace kernel { -namespace flash { - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template struct BlockInfo { - - template - __device__ BlockInfo(const Params ¶ms, const int bidb) - : sum_s_q(!Varlen || params.cu_seqlens_q == nullptr - ? -1 - : params.cu_seqlens_q[bidb]), - sum_s_k(!Varlen || params.cu_seqlens_k == nullptr - ? -1 - : params.cu_seqlens_k[bidb]), - actual_seqlen_q(!Varlen || params.cu_seqlens_q == nullptr - ? params.seqlen_q - : params.cu_seqlens_q[bidb + 1] - sum_s_q), - actual_seqlen_k(!Varlen || params.cu_seqlens_k == nullptr - ? params.seqlen_k - : params.cu_seqlens_k[bidb + 1] - sum_s_k) {} - - template - inline __device__ index_t q_offset(const index_t batch_stride, - const index_t row_stride, - const int bidb) const { - return sum_s_q == -1 ? bidb * batch_stride : uint32_t(sum_s_q) * row_stride; - } - - template - inline __device__ index_t k_offset(const index_t batch_stride, - const index_t row_stride, - const int bidb) const { - return sum_s_k == -1 ? bidb * batch_stride : uint32_t(sum_s_k) * row_stride; - } - - const int sum_s_q; - const int sum_s_k; - const uint32_t actual_seqlen_q; - const uint32_t actual_seqlen_k; -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -} // namespace flash -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.cu deleted file mode 100644 index dce323437..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.cu +++ /dev/null @@ -1,335 +0,0 @@ -#include "flash.h" -#include "flash_fwd_launch_template.h" -#include - -namespace brt { -namespace cuda { -namespace kernel { - -// TODO: Switch back to handling bf16. -// void run_mha_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { -// FWD_HEADDIM_SWITCH(params.d, [&] { -// run_mha_fwd_(params, stream); -// }); -// } - -// void run_mha_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { -// FP16_SWITCH(!params.is_bf16, [&] { -// FWD_HEADDIM_SWITCH(params.d, [&] { -// run_mha_fwd_(params, stream); -// }); -// }); -// } - -// for debug -void print_Qkv_params(Qkv_params ¶ms) { - std::cout << "q_batch_stride: " << params.q_batch_stride << std::endl; - std::cout << "k_batch_stride: " << params.k_batch_stride << std::endl; - std::cout << "v_batch_stride: " << params.v_batch_stride << std::endl; - std::cout << "q_row_stride: " << params.q_row_stride << std::endl; - std::cout << "k_row_stride: " << params.k_row_stride << std::endl; - std::cout << "v_row_stride: " << params.v_row_stride << std::endl; - std::cout << "q_head_stride: " << params.q_head_stride << std::endl; - std::cout << "k_head_stride: " << params.k_head_stride << std::endl; - std::cout << "v_head_stride: " << params.v_head_stride << std::endl; - std::cout << "h: " << params.h << std::endl; - std::cout << "h_k: " << params.h_k << std::endl; - std::cout << "h_h_k_ratio: " << params.h_h_k_ratio << std::endl; -} - -void print_Flash_fwd_params(Flash_fwd_params ¶ms) { - std::cout << "q_batch_stride: " << params.q_batch_stride << std::endl; - std::cout << "k_batch_stride: " << params.k_batch_stride << std::endl; - std::cout << "v_batch_stride: " << params.v_batch_stride << std::endl; - std::cout << "q_row_stride: " << params.q_row_stride << std::endl; - std::cout << "k_row_stride: " << params.k_row_stride << std::endl; - std::cout << "v_row_stride: " << params.v_row_stride << std::endl; - std::cout << "q_head_stride: " << params.q_head_stride << std::endl; - std::cout << "k_head_stride: " << params.k_head_stride << std::endl; - std::cout << "v_head_stride: " << params.v_head_stride << std::endl; - std::cout << "h: " << params.h << std::endl; - std::cout << "h_k: " << params.h_k << std::endl; - std::cout << "h_h_k_ratio: " << params.h_h_k_ratio << std::endl; - - std::cout << "o_batch_stride: " << params.o_batch_stride << std::endl; - std::cout << "o_row_stride: " << params.o_row_stride << std::endl; - std::cout << "o_head_stride: " << params.o_head_stride << std::endl; - std::cout << "b: " << params.b << std::endl; - std::cout << "seqlen_q: " << params.seqlen_q << std::endl; - std::cout << "seqlen_k: " << params.seqlen_k << std::endl; - std::cout << "d: " << params.d << std::endl; - std::cout << "seqlen_q_rounded: " << params.seqlen_q_rounded << std::endl; - std::cout << "seqlen_k_rounded: " << params.seqlen_k_rounded << std::endl; - std::cout << "d_rounded: " << params.d_rounded << std::endl; - std::cout << "scale_softmax: " << params.scale_softmax << std::endl; - std::cout << "scale_softmax_log2: " << params.scale_softmax_log2 << std::endl; - std::cout << "p_dropout: " << params.p_dropout << std::endl; - std::cout << "p_dropout_in_uint8_t: " << params.p_dropout_in_uint8_t - << std::endl; - std::cout << "rp_dropout: " << params.rp_dropout << std::endl; - std::cout << "scale_softmax_rp_dropout: " << params.scale_softmax_rp_dropout - << std::endl; - std::cout << "is_bf16: " << params.is_bf16 << std::endl; - std::cout << "is_causal: " << params.is_causal << std::endl; -} - -void print_Flash_bwd_params(Flash_bwd_params ¶ms) { - std::cout << "q_batch_stride: " << params.q_batch_stride << std::endl; - std::cout << "k_batch_stride: " << params.k_batch_stride << std::endl; - std::cout << "v_batch_stride: " << params.v_batch_stride << std::endl; - std::cout << "q_row_stride: " << params.q_row_stride << std::endl; - std::cout << "k_row_stride: " << params.k_row_stride << std::endl; - std::cout << "v_row_stride: " << params.v_row_stride << std::endl; - std::cout << "q_head_stride: " << params.q_head_stride << std::endl; - std::cout << "k_head_stride: " << params.k_head_stride << std::endl; - std::cout << "v_head_stride: " << params.v_head_stride << std::endl; - std::cout << "h: " << params.h << std::endl; - std::cout << "h_k: " << params.h_k << std::endl; - std::cout << "h_h_k_ratio: " << params.h_h_k_ratio << std::endl; - - std::cout << "o_batch_stride: " << params.o_batch_stride << std::endl; - std::cout << "o_row_stride: " << params.o_row_stride << std::endl; - std::cout << "o_head_stride: " << params.o_head_stride << std::endl; - std::cout << "b: " << params.b << std::endl; - std::cout << "seqlen_q: " << params.seqlen_q << std::endl; - std::cout << "seqlen_k: " << params.seqlen_k << std::endl; - std::cout << "d: " << params.d << std::endl; - std::cout << "seqlen_q_rounded: " << params.seqlen_q_rounded << std::endl; - std::cout << "seqlen_k_rounded: " << params.seqlen_k_rounded << std::endl; - std::cout << "d_rounded: " << params.d_rounded << std::endl; - std::cout << "scale_softmax: " << params.scale_softmax << std::endl; - std::cout << "scale_softmax_log2: " << params.scale_softmax_log2 << std::endl; - std::cout << "p_dropout: " << params.p_dropout << std::endl; - std::cout << "p_dropout_in_uint8_t: " << params.p_dropout_in_uint8_t - << std::endl; - std::cout << "rp_dropout: " << params.rp_dropout << std::endl; - std::cout << "scale_softmax_rp_dropout: " << params.scale_softmax_rp_dropout - << std::endl; - std::cout << "is_bf16: " << params.is_bf16 << std::endl; - std::cout << "is_causal: " << params.is_causal << std::endl; - - std::cout << "do_batch_stride: " << params.do_batch_stride << std::endl; - std::cout << "do_row_stride: " << params.do_row_stride << std::endl; - std::cout << "do_head_stride: " << params.do_head_stride << std::endl; - std::cout << "dq_batch_stride: " << params.dq_batch_stride << std::endl; - std::cout << "dk_batch_stride: " << params.dk_batch_stride << std::endl; - std::cout << "dv_batch_stride: " << params.dv_batch_stride << std::endl; - std::cout << "dq_row_stride: " << params.dq_row_stride << std::endl; - std::cout << "dk_row_stride: " << params.dk_row_stride << std::endl; - std::cout << "dv_row_stride: " << params.dv_row_stride << std::endl; - std::cout << "dq_head_stride: " << params.dq_head_stride << std::endl; - std::cout << "dk_head_stride: " << params.dk_head_stride << std::endl; - std::cout << "dv_head_stride: " << params.dv_head_stride << std::endl; -} - -void run_mha(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, - void *softmax_lse_ptr, void *softmax_ptr, void *rng_state_ptr, - - int32_t *cu_seqlens_q_ptr, int32_t *cu_seqlens_k_ptr, - - uint32_t q_batch_stride, uint32_t k_batch_stride, - uint32_t v_batch_stride, uint32_t o_batch_stride, - - uint32_t q_row_stride, uint32_t k_row_stride, - uint32_t v_row_stride, uint32_t o_row_stride, - - uint32_t q_head_stride, uint32_t k_head_stride, - uint32_t v_head_stride, uint32_t o_head_stride, - - uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, - uint32_t d_rounded, float softmax_scale, - - uint32_t seqlen_q, uint32_t seqlen_k, uint32_t seqlen_q_rounded, - uint32_t seqlen_k_rounded, - - float p_dropout, int is_causal, cudaStream_t stream) { - Flash_fwd_params params; - // Reset the parameters - memset(¶ms, 0, sizeof(params)); - - // Set the pointers and strides. - params.q_ptr = q_ptr; - params.k_ptr = k_ptr; - params.v_ptr = v_ptr; - params.o_ptr = o_ptr; - - params.softmax_lse_ptr = softmax_lse_ptr; - - // All stride are in elements, not bytes. - params.q_batch_stride = q_batch_stride; - params.k_batch_stride = k_batch_stride; - params.v_batch_stride = v_batch_stride; - params.o_batch_stride = o_batch_stride; - - params.q_row_stride = q_row_stride; - params.k_row_stride = k_row_stride; - params.v_row_stride = v_row_stride; - params.o_row_stride = o_row_stride; - params.q_head_stride = q_head_stride; - params.k_head_stride = k_head_stride; - params.v_head_stride = v_head_stride; - params.o_head_stride = o_head_stride; - - // Set the dimensions. - params.b = b; - params.h = h; - params.h_k = h_k; - params.h_h_k_ratio = h / h_k; - params.seqlen_q = seqlen_q; - params.seqlen_k = seqlen_k; - params.seqlen_q_rounded = seqlen_q_rounded; - params.seqlen_k_rounded = seqlen_k_rounded; - params.d = d; - params.d_rounded = d_rounded; - params.is_causal = is_causal; - - // Set the different scale values. - params.scale_softmax = softmax_scale; - params.scale_softmax_log2 = softmax_scale * M_LOG2E; - - params.p_dropout = 1.f - p_dropout; // probability to keep - params.p_dropout_in_uint8_t = uint8_t(std::floor(params.p_dropout * 255.0)); - params.rp_dropout = 1.f / params.p_dropout; - params.scale_softmax_rp_dropout = params.rp_dropout * params.scale_softmax; - params.is_bf16 = 0; - params.cu_seqlens_q = cu_seqlens_q_ptr; - params.cu_seqlens_k = cu_seqlens_k_ptr; - params.p_ptr = softmax_ptr; // used for `return_softmax`. - params.rng_state = static_cast(rng_state_ptr); - - // print_Flash_fwd_params(params); - - FP16_SWITCH(!params.is_bf16, [&] { - FWD_HEADDIM_SWITCH( - params.d, [&] { run_mha_fwd_(params, stream); }); - }); -} - -void run_mha_bwd(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, - void *dout_ptr, void *dq_ptr, void *dk_ptr, void *dv_ptr, - int *cu_seqlens_q_ptr, int *cu_seqlens_k_ptr, - void *dq_accum_ptr, void *dk_accum_ptr, void *dv_accum_ptr, - void *softmax_lse_ptr, void *dsoftmax_sum_ptr, - void *rng_state_ptr, - - uint32_t q_batch_stride, uint32_t k_batch_stride, - uint32_t v_batch_stride, uint32_t o_batch_stride, - - uint32_t q_row_stride, uint32_t k_row_stride, - uint32_t v_row_stride, uint32_t o_row_stride, - - uint32_t q_head_stride, uint32_t k_head_stride, - uint32_t v_head_stride, uint32_t o_head_stride, - - uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, - uint32_t d_rounded, float softmax_scale, - - uint32_t seqlen_q, uint32_t seqlen_k, - uint32_t seqlen_q_rounded, uint32_t seqlen_k_rounded, - - float p_dropout, int is_causal, cudaStream_t stream) { - Flash_bwd_params params; - // Reset the parameters - memset(¶ms, 0, sizeof(params)); - - // Set the pointers and strides. - params.q_ptr = q_ptr; - params.k_ptr = k_ptr; - params.v_ptr = v_ptr; - params.o_ptr = o_ptr; - - params.dq_ptr = dq_ptr; - params.dk_ptr = dk_ptr; - params.dv_ptr = dv_ptr; - params.do_ptr = dout_ptr; - - params.dq_accum_ptr = dq_accum_ptr; - params.dk_accum_ptr = dk_accum_ptr; - params.dv_accum_ptr = dv_accum_ptr; - - params.softmax_lse_ptr = softmax_lse_ptr; - - // All stride are in elements, not bytes. - params.q_batch_stride = q_batch_stride; - params.k_batch_stride = k_batch_stride; - params.v_batch_stride = v_batch_stride; - params.o_batch_stride = o_batch_stride; - - params.q_row_stride = q_row_stride; - params.k_row_stride = k_row_stride; - params.v_row_stride = v_row_stride; - params.o_row_stride = o_row_stride; - params.q_head_stride = q_head_stride; - params.k_head_stride = k_head_stride; - params.v_head_stride = v_head_stride; - params.o_head_stride = o_head_stride; - - params.dq_batch_stride = q_batch_stride; - params.dk_batch_stride = k_batch_stride; - params.dv_batch_stride = v_batch_stride; - params.do_batch_stride = o_batch_stride; - - params.dq_row_stride = q_row_stride; - params.dk_row_stride = k_row_stride; - params.dv_row_stride = v_row_stride; - params.do_row_stride = o_row_stride; - params.dq_head_stride = q_head_stride; - params.dk_head_stride = k_head_stride; - params.dv_head_stride = v_head_stride; - params.do_head_stride = o_head_stride; - - // Set the dimensions. - params.b = b; - params.h = h; - params.h_k = h_k; - params.h_h_k_ratio = h / h_k; - params.seqlen_q = seqlen_q; - params.seqlen_k = seqlen_k; - params.seqlen_q_rounded = seqlen_q_rounded; - params.seqlen_k_rounded = seqlen_k_rounded; - params.d = d; - params.d_rounded = d_rounded; - params.is_causal = is_causal; - - // Set the different scale values. - params.scale_softmax = softmax_scale; - params.scale_softmax_log2 = softmax_scale * M_LOG2E; - - params.p_dropout = 1.f - p_dropout; // probability to keep - params.p_dropout_in_uint8_t = uint8_t(std::floor(params.p_dropout * 255.0)); - params.rp_dropout = 1.f / params.p_dropout; - params.scale_softmax_rp_dropout = params.rp_dropout * params.scale_softmax; - params.is_bf16 = 0; - params.cu_seqlens_q = cu_seqlens_q_ptr; - params.cu_seqlens_k = cu_seqlens_k_ptr; - params.p_ptr = nullptr; // used for `return_softmax`, no use in bwd - params.dsoftmax_sum = dsoftmax_sum_ptr; - params.rng_state = static_cast(rng_state_ptr); - - // print_Flash_bwd_params(params); - - bool configure = false; - FP16_SWITCH(!params.is_bf16, [&] { - if (params.d <= 32) { - run_mha_bwd_(params, stream, configure); - } else if (params.d <= 64) { - run_mha_bwd_(params, stream, configure); - } else if (params.d <= 96) { - run_mha_bwd_(params, stream, configure); - } else if (params.d <= 128) { - run_mha_bwd_(params, stream, configure); - } else if (params.d <= 160) { - run_mha_bwd_(params, stream, configure); - } else if (params.d <= 192) { - run_mha_bwd_(params, stream, configure); - } else if (params.d <= 224) { - run_mha_bwd_(params, stream, configure); - } else if (params.d <= 256) { - run_mha_bwd_(params, stream, configure); - } - }); -} - -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.h deleted file mode 100644 index 5c6e6f70c..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_api.h +++ /dev/null @@ -1,60 +0,0 @@ -#include "flash.h" -#include - -namespace brt { -namespace cuda { -namespace kernel { - -void print_Qkv_params(Qkv_params ¶ms); -void print_Flash_fwd_params(Flash_fwd_params ¶ms); -void print_Flash_bwd_params(Flash_bwd_params ¶ms); - -void run_mha(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, - void *softmax_lse_ptr, void *softmax_ptr, void *rng_state_ptr, - - int32_t *cu_seqlens_q_ptr, int32_t *cu_seqlens_k_ptr, - - uint32_t q_batch_stride, uint32_t k_batch_stride, - uint32_t v_batch_stride, uint32_t o_batch_stride, - - uint32_t q_row_stride, uint32_t k_row_stride, - uint32_t v_row_stride, uint32_t o_row_stride, - - uint32_t q_head_stride, uint32_t k_head_stride, - uint32_t v_head_stride, uint32_t o_head_stride, - - uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, - uint32_t d_rounded, float softmax_scale, - - uint32_t seqlen_q, uint32_t seqlen_k, uint32_t seqlen_q_rounded, - uint32_t seqlen_k_rounded, - - float p_dropout, int is_causal, cudaStream_t stream); - -void run_mha_bwd(void *q_ptr, void *k_ptr, void *v_ptr, void *o_ptr, - void *dout_ptr, void *dq_ptr, void *dk_ptr, void *dv_ptr, - int *cu_seqlens_q_ptr, int *cu_seqlens_k_ptr, - void *dq_accum_ptr, void *dk_accum_ptr, void *dv_accum_ptr, - void *softmax_lse_ptr, void *dsoftmax_sum_ptr, - void *rng_state_ptr, - - uint32_t q_batch_stride, uint32_t k_batch_stride, - uint32_t v_batch_stride, uint32_t o_batch_stride, - - uint32_t q_row_stride, uint32_t k_row_stride, - uint32_t v_row_stride, uint32_t o_row_stride, - - uint32_t q_head_stride, uint32_t k_head_stride, - uint32_t v_head_stride, uint32_t o_head_stride, - - uint32_t b, uint32_t h, uint32_t h_k, uint32_t d, - uint32_t d_rounded, float softmax_scale, - - uint32_t seqlen_q, uint32_t seqlen_k, - uint32_t seqlen_q_rounded, uint32_t seqlen_k_rounded, - - float p_dropout, int is_causal, cudaStream_t stream); - -} // namespace kernel -} // namespace cuda -} // namespace brt diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_bf16_sm80.cu deleted file mode 100644 index 12ce21036..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_bf16_sm80.cu +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, -// cudaStream_t stream, const bool configure) { -// using elem_type = cutlass::bfloat16_t; -// if (params.h == params.h_k) { -// run_flash_bwd>(params, stream, configure); -// } else { -// run_flash_bwd_seqq_parallel>(params, stream, configure); -// } -// } -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim128(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_fp16_sm80.cu deleted file mode 100644 index 42eaff2ae..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim128_fp16_sm80.cu +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, -// cudaStream_t stream, const bool configure) { -// using elem_type = cutlass::half_t; -// if (params.h == params.h_k) { -// // run_flash_bwd>(params, stream, configure); -// // This is faster, in the case of sequence-parallel bwd (where we need -// fewer registers). -// // Out of these three, the 2nd one is slightly faster (2% faster than the -// first). Idk why. -// // run_flash_bwd>(params, stream, configure); -// run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); } else { -// run_flash_bwd_seqq_parallel>(params, stream, configure); -// } -// } -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim128(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_bf16_sm80.cu deleted file mode 100644 index e01e17733..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_bf16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim160(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_fp16_sm80.cu deleted file mode 100644 index c1ab3e03b..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim160_fp16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim160(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_bf16_sm80.cu deleted file mode 100644 index c71b4c886..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_bf16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim192(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_fp16_sm80.cu deleted file mode 100644 index 173349878..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim192_fp16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim192(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_bf16_sm80.cu deleted file mode 100644 index 674bc0e07..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_bf16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim224(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_fp16_sm80.cu deleted file mode 100644 index 92913570f..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim224_fp16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim224(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_bf16_sm80.cu deleted file mode 100644 index feaec98d0..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_bf16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim256(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_fp16_sm80.cu deleted file mode 100644 index 60a18a5a4..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim256_fp16_sm80.cu +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim256(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_bf16_sm80.cu deleted file mode 100644 index 809379ee7..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_bf16_sm80.cu +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, -// cudaStream_t stream, const bool configure) { -// using elem_type = cutlass::bfloat16_t; -// run_flash_bwd>(params, stream, configure); -// } - -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim32(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_fp16_sm80.cu deleted file mode 100644 index 2c5414bad..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim32_fp16_sm80.cu +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t -// stream, const bool configure) { -// using elem_type = cutlass::half_t; -// run_flash_bwd>(params, stream, configure); -// } -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim32(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_bf16_sm80.cu deleted file mode 100644 index 4f2af6edc..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_bf16_sm80.cu +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, -// cudaStream_t stream, const bool configure) { -// using elem_type = cutlass::bfloat16_t; -// run_flash_bwd>(params, stream, configure); -// } -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim64(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_fp16_sm80.cu deleted file mode 100644 index d467d89ca..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim64_fp16_sm80.cu +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t -// stream, const bool configure) { -// using elem_type = cutlass::half_t; -// // Changing AtomLayoutMdQ from 2 to 4 takes the same time -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // This is slightly faster. We want to split M more so we need fewer -// registers to store LSE. run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // M=128, N=64 is quite slow, I think because we need to read/write -// dQaccum twice as many times -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); - -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// // run_flash_bwd>(params, stream, configure); -// } -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim64(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_bf16_sm80.cu deleted file mode 100644 index 156f5184c..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_bf16_sm80.cu +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, -// cudaStream_t stream, const bool configure) { -// using elem_type = cutlass::bfloat16_t; -// if (params.h == params.h_k) { -// run_flash_bwd>(params, stream, configure); -// } else { -// run_flash_bwd_seqq_parallel>(params, stream, configure); -// } -// } -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim96(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_fp16_sm80.cu deleted file mode 100644 index bef1b28ba..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_hdim96_fp16_sm80.cu +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_bwd_launch_template.h" - -// template<> -// void run_mha_bwd_(Flash_bwd_params ¶ms, cudaStream_t -// stream, const bool configure) { -// using elem_type = cutlass::half_t; -// if (params.h == params.h_k) { -// // run_flash_bwd>(params, stream, configure); -// // This is very slightly faster -// run_flash_bwd>(params, stream, configure); -// } else { -// run_flash_bwd_seqq_parallel>(params, stream, configure); -// } -// } -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_bwd_(Flash_bwd_params ¶ms, - cudaStream_t stream, - const bool configure) { - run_mha_bwd_hdim96(params, stream, configure); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_kernel.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_kernel.h deleted file mode 100644 index 91bac6590..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_kernel.h +++ /dev/null @@ -1,2004 +0,0 @@ -/*************************************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -#include -#include - -#include -#include -#include -#include - -#include "block_info.h" -#include "kernel_traits.h" -#include "philox.cuh" -#include "softmax.h" -#include "utils.h" - -namespace brt { -namespace cuda { -namespace kernel { -namespace flash { - -using namespace cute; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -CUTE_HOST_DEVICE auto -make_tiled_copy_B_warpcontiguousN(Copy_Atom const ©_atom, - TiledMMA const &tiled_mma) { - using TileShape_MNK = typename TiledMMA::TiledShape_MNK; - using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; - constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; - // Divide by 2 because right now we always use 2 for the ValLayout - constexpr int kNWarpsN = - decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; - constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; - // This gives the correct layout, idk why. - // auto t = make_tile(Layout, _2>, - // Stride, _8> >{}, - // auto t = make_tile(Layout, - // Stride<_1, _64, _8> >{}, - auto t = make_tile( - Layout, Int, _2>, // (8, 2, 2) or (8, 4, - // 2) - Stride<_1, Int, _8>>{}, // (1, 64, 8) or (1, 32, 8) - make_layout(size<2>(TileShape_MNK{}))); - // if (cute::thread0()) {printf("make_tiled_copy_B_warpcontiguousN "); - // print(t); printf("\n"); } - return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutB_TV(), t); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -CUTE_HOST_DEVICE auto -make_tiled_copy_C_warpcontiguousN(Copy_Atom const ©_atom, - TiledMMA const &tiled_mma) { - using TileShape_MNK = typename TiledMMA::TiledShape_MNK; - using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; - constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; - // Divide by 2 because right now we always use 2 for the ValLayout - constexpr int kNWarpsN = - decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; - constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; - auto t = make_tile( - make_layout(size<0>(TileShape_MNK{})), - Layout, Int, _2>, // (8, 2, 2) or (8, 4, - // 2) - Stride<_1, Int, _8>>{}); // (1, 64, 8) or (1, 32, 8) - // if (cute::thread0()) {printf("make_tiled_copy_C_warpcontiguousN "); - // print(t); printf("\n"); } - return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutC_TV(), t); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void -dot_do_o(Tensor const &do_, Tensor const &o, - Tensor &dP_sum, Tensor &sdPsum, - const int gdP_col_stride, const float scale) { - static_assert(Layout0::rank == 3, "Only support 3D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(do_.layout() == o.layout()); - // Reshape do_ and o from (8, kBlockM / 32, kHeadDim / 64) to (kBlockM / 32, 8 - // * kHeadDim / 64) The last coordinate is the "page". - Tensor do_reshaped = make_tensor( - do_.data(), - make_layout(get<1>(do_.layout()), - make_layout(get<0>(do_.layout()), get<2>(do_.layout())))); - Tensor o_reshaped = make_tensor(o.data(), do_reshaped.layout()); - Tensor do_fp32 = flash::convert_type(do_reshaped); - Tensor o_fp32 = flash::convert_type(o_reshaped); -#pragma unroll - for (int mi = 0; mi < size<0>(do_reshaped); ++mi) { - float dP_sum_cur = do_fp32(mi, 0) * o_fp32(mi, 0); -#pragma unroll - for (int ni = 1; ni < size<1>(do_reshaped); ni++) { - dP_sum_cur += do_fp32(mi, ni) * o_fp32(mi, ni); - } - flash::SumOp sum_op; - dP_sum_cur = - flash::Allreduce::run(dP_sum_cur, sum_op) * scale; - if (threadIdx.x % THREADS_PER_ROW == 0) { - dP_sum(mi * gdP_col_stride + threadIdx.x / THREADS_PER_ROW) = dP_sum_cur; - // recast(sdPsum)(mi * gdP_col_stride + threadIdx.x / - // THREADS_PER_ROW) = dP_sum; - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Just compute dot(do, o) and write the result (softmax_d) to global memory as -// a separate kernel. This is used in the case where we want to parallelize the -// backward across seqlen_k. -template -inline __device__ void compute_dot_do_o(const Params ¶ms) { - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - const int m_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (m_block * kBlockM >= binfo.actual_seqlen_q) - return; - - const index_t row_offset_do = - binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + - m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; - const index_t row_offset_o = - binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + - m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; - const index_t row_offset_dq_accum = - ((bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM) * - params.d_rounded; - const index_t row_offset_dpsum = - (bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM; - - Tensor gdO = make_tensor( - make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), - Shape, Int>{}, - make_stride(params.do_row_stride, _1{})); - Tensor gO = make_tensor( - make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), - Shape, Int>{}, - make_stride(params.o_row_stride, _1{})); - Tensor gdQaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + - row_offset_dq_accum), - Shape, Int>{}, Stride, _1>{}); - Tensor dP_sum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + - row_offset_dpsum), - Shape>{}, Stride<_1>{}); - - typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; - auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); - // TODO: careful, we're zeroing out dQaccum with type float4, but when - // we do atomicAdds, we use type float. The layouts are different. Check this. - typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dQaccum; - auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); - - Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); - Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); - Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); - - Tensor cdO = make_identity_tensor( - Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdOcdO = gmem_thr_copy_dO.partition_S(cdO); - - // Allocate predicate tensors for k - Tensor tdOpdO = make_tensor(make_shape(size<2>(tdOgdO))); -// Set predicates for k bounds -#pragma unroll - for (int k = 0; k < size(tdOpdO); ++k) { - tdOpdO(k) = get<1>(tdOcdO(0, 0, k)) < params.d; - } - - Tensor tdOrdO = make_fragment_like(tdOgdO); - Tensor tdOrO = make_fragment_like(tdOgO); - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOrdO, tdOcdO, tdOpdO, - binfo.actual_seqlen_q - m_block * kBlockM); - flash::copy( - gmem_tiled_copy_dO, tdOgO, tdOrO, tdOcdO, tdOpdO, - binfo.actual_seqlen_q - m_block * kBlockM); - // By right we need to scale dP up by 1/p_dropout, but instead we don't and - // only scale the final results (dQ and dK) by 1/p_dropout. So we need to keep - // dP_sum scaled down by p_dropout here, so that (dP - dP_sum) is on the same - // scale. - dot_do_o( - tdOrdO, tdOrO, dP_sum, dP_sum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), - params.p_dropout); - if (Clear_dQaccum) { - Tensor zero = make_fragment_like(tdQgdQaccum); - clear(zero); - cute::copy(gmem_tiled_copy_dQaccum, zero, tdQgdQaccum); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void clear_dKVaccum(const Params ¶ms) { - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - const int n_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (n_block * kBlockN >= binfo.actual_seqlen_k) - return; - - const index_t row_offset_dkv_accum = - ((bidb * params.h_k + bidh) * params.seqlen_k_rounded + - n_block * kBlockN) * - params.d_rounded; - - Tensor gdKaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + - row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - Tensor gdVaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + - row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - - typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dKVaccum; - auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); - Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); - Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); - Tensor zero = make_fragment_like(tdKgdKaccum); - clear(zero); - cute::copy(gmem_tiled_copy_dKVaccum, zero, tdKgdKaccum); - cute::copy(gmem_tiled_copy_dKVaccum, zero, tdVgdVaccum); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Convert dQ from dQaccum (in float) to fp16/bf16. -// This is used in the case where we want to parallelize the backward across -// seqlen_k. -template -inline __device__ void convert_dQ(const Params ¶ms) { - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - const int m_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (m_block * kBlockM >= binfo.actual_seqlen_q) - return; - - const index_t row_offset_dq = - binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + - m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; - const index_t row_offset_dq_accum = - ((bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM) * - params.d_rounded; - - Tensor gdQ = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), - Shape, Int>{}, - make_stride(params.dq_row_stride, _1{})); - Tensor gdQaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + - row_offset_dq_accum), - Shape, Int>{}, Stride, _1>{}); - - Tensor sdQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutdQ{}); - - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; - auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dQaccum; - auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); - - typename Kernel_traits::TiledMmadQ tiled_mma_dq; - auto smem_tiled_copy_dQ = - make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); - auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor taccdQsdQ = - smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - Tensor tdQsdQ = - gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); - Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_S(gdQaccum); - - Tensor acc_dq = partition_fragment_C( - tiled_mma_dq, Shape, Int>{}); // MMA, MMA_N, MMA_K - CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); - - Tensor tdQrdQaccum = make_fragment_like(tdQgdQaccum); - cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, tdQrdQaccum); -#pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { - acc_dq(i) = tdQrdQaccum(i) * params.scale_softmax_rp_dropout; - } - // Convert acc_dq from fp32 to fp16 - Tensor rdQ = flash::convert_type(acc_dq); - Tensor taccdQrdQ = - smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); - __syncthreads(); - Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); - cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); - - Tensor cdQ = make_identity_tensor( - Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); - Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); -#pragma unroll - for (int k = 0; k < size(tdQpdQ); ++k) { - tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; - } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy(gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, - tdQpdQ, - binfo.actual_seqlen_q - m_block * kBlockM); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Convert dK and dV from dKaccum and dVaccum (in float) to fp16/bf16. -// This is used in the case where we want to parallelize the backward across -// seqlen_q. -template -inline __device__ void convert_dKV(const Params ¶ms) { - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - const int n_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (n_block * kBlockN >= binfo.actual_seqlen_k) - return; - - const index_t row_offset_dk = - binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + - n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; - const index_t row_offset_dv = - binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + - n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; - const index_t row_offset_dkv_accum = - ((bidb * params.h_k + bidh) * params.seqlen_k_rounded + - n_block * kBlockN) * - params.d_rounded; - - Tensor gdK = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), - Shape, Int>{}, - make_stride(params.dk_row_stride, _1{})); - Tensor gdV = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), - Shape, Int>{}, - make_stride(params.dv_row_stride, _1{})); - Tensor gdKaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + - row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - Tensor gdVaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + - row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - - Tensor sdK = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutdKV{}); - Tensor sdV = - make_tensor(sdK.data() + size(sdK), - typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) - - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dKV; - auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd - gmem_tiled_copy_dKVaccum; - auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); - - typename Kernel_traits::TiledMmadKV tiled_mma_dkv; - auto smem_tiled_copy_dKV = make_tiled_copy_C( - typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); - auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor taccdKsdK = - smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) - Tensor taccdVsdV = - smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - Tensor tdKsdK = - gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); - Tensor tdVsdV = - gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); - Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_S(gdKaccum); - Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_S(gdVaccum); - - Tensor acc_dk = partition_fragment_C( - tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - Tensor acc_dv = partition_fragment_C( - tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - CUTE_STATIC_ASSERT_V(size(acc_dk) == size(tdKgdKaccum)); - CUTE_STATIC_ASSERT_V(size(acc_dv) == size(tdVgdVaccum)); - - Tensor tdKrdKaccum = make_fragment_like(tdKgdKaccum); - Tensor tdVrdVaccum = make_fragment_like(tdVgdVaccum); - cute::copy(gmem_tiled_copy_dKVaccum, tdKgdKaccum, tdKrdKaccum); - cute::copy(gmem_tiled_copy_dKVaccum, tdVgdVaccum, tdVrdVaccum); -#pragma unroll - for (int i = 0; i < size(acc_dk); ++i) { - acc_dk(i) = tdKrdKaccum(i) * params.scale_softmax_rp_dropout; - } -#pragma unroll - for (int i = 0; i < size(acc_dv); ++i) { - acc_dv(i) = tdVrdVaccum(i) * params.rp_dropout; - } - // Convert acc_dk from fp32 to fp16 - Tensor rdK = flash::convert_type(acc_dk); - Tensor rdV = flash::convert_type(acc_dv); - Tensor taccdKrdK = - smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdVrdV = - smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); - cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); - __syncthreads(); - Tensor tdKrdK = make_tensor(shape(tdKgdK)); - Tensor tdVrdV = make_tensor(shape(tdVgdV)); - cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); - cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); - - Tensor cdKV = make_identity_tensor( - Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); - Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); -#pragma unroll - for (int k = 0; k < size(tdKVpdKV); ++k) { - tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; - } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy(gmem_tiled_copy_dKV, tdKrdK, tdKgdK, - tdKVcdKV, tdKVpdKV, - binfo.actual_seqlen_k - n_block * kBlockN); - flash::copy(gmem_tiled_copy_dKV, tdVrdV, tdVgdV, - tdKVcdKV, tdKVpdKV, - binfo.actual_seqlen_k - n_block * kBlockN); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void -compute_dq_dk_dv_1colblock(const Params ¶ms, const int bidb, const int bidh, - const int n_block) { - - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - // constexpr int kNWarps = Kernel_traits::kNWarps; - constexpr int MMA_N_SdP = - kBlockN / - decltype(size<1>( - typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; - constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; - constexpr bool Double_buffer = !Kernel_traits::No_double_buffer; - - const BlockInfo binfo(params, bidb); - if (n_block * kBlockN >= binfo.actual_seqlen_k || binfo.actual_seqlen_q == 0) - return; - - int m_block_max = cute::ceil_div(binfo.actual_seqlen_q, kBlockM); - - const index_t row_offset_q = - binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + - (m_block_max - 1) * kBlockM * params.q_row_stride + - bidh * params.q_head_stride; - const index_t row_offset_k = - binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + - n_block * kBlockN * params.k_row_stride + - (bidh / params.h_h_k_ratio) * params.k_head_stride; - const index_t row_offset_v = - binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + - n_block * kBlockN * params.v_row_stride + - (bidh / params.h_h_k_ratio) * params.v_head_stride; - const index_t row_offset_do = - binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + - (m_block_max - 1) * kBlockM * params.do_row_stride + - bidh * params.do_head_stride; - const index_t row_offset_o = - binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + - (m_block_max - 1) * kBlockM * params.o_row_stride + - bidh * params.o_head_stride; - const index_t row_offset_dq = - binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + - (m_block_max - 1) * kBlockM * params.dq_row_stride + - bidh * params.dq_head_stride; - const index_t row_offset_dq_accum = - ((bidb * params.h + bidh) * params.seqlen_q_rounded + - (m_block_max - 1) * kBlockM) * - params.d_rounded; - const index_t row_offset_lse = - (bidb * params.h + bidh) * params.seqlen_q + (m_block_max - 1) * kBlockM; - const index_t row_offset_dpsum = - (bidb * params.h + bidh) * params.seqlen_q_rounded + - (m_block_max - 1) * kBlockM; - - Tensor gQ = make_tensor( - make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), - Shape, Int>{}, - make_stride(params.q_row_stride, _1{})); - Tensor gK = make_tensor( - make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), - Shape, Int>{}, - make_stride(params.k_row_stride, _1{})); - Tensor gV = make_tensor( - make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), - Shape, Int>{}, - make_stride(params.v_row_stride, _1{})); - Tensor gdO = make_tensor( - make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), - Shape, Int>{}, - make_stride(params.do_row_stride, _1{})); - Tensor gO = make_tensor( - make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), - Shape, Int>{}, - make_stride(params.o_row_stride, _1{})); - Tensor gdQ = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), - Shape, Int>{}, - make_stride(params.dq_row_stride, _1{})); - Tensor gdQaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + - row_offset_dq_accum), - Shape, Int>{}, Stride, _1>{}); - Tensor gLSE = make_tensor( - make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + - row_offset_lse), - Shape>{}, Stride<_1>{}); - Tensor gdPsum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + - row_offset_dpsum), - Shape>{}, Stride<_1>{}); - - Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutQdO{}); - Tensor sQt = - make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sQtNoSwizzle = make_tensor( - sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - // Double buffer for sQ - Tensor sdO = make_tensor(sQ.data() + (Double_buffer ? 2 : 1) * size(sQ), - typename Kernel_traits::SmemLayoutQdO{}); - Tensor sdOt = make_tensor(sdO.data(), - typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sdOtransposedNoSwizzle = make_tensor( - sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - Tensor sK = make_tensor(sdO.data() + size(sdO), - typename Kernel_traits::SmemLayoutKV{}); - Tensor sV = - make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{}); - Tensor sKt = - make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); - Tensor sKtNoSwizzle = make_tensor( - sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); - Tensor sdS = make_tensor(!Kernel_traits::Is_V_in_regs ? sV.data() + size(sV) - : sK.data() + size(sK), - typename Kernel_traits::SmemLayoutPdS{}); - Tensor sdSt = make_tensor(sdS.data(), - typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sdStNoSwizzle = make_tensor( - sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - Tensor sP = make_tensor(sdS.data() + size(sdS), - typename Kernel_traits::SmemLayoutPdS{}); - Tensor sPt = - make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sPtNoSwizzle = make_tensor( - sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - // sP and sdQ share the same memory so be careful - Tensor sdQ = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutdQ{}); - Tensor sdPsum = - make_tensor(make_smem_ptr(reinterpret_cast( - (sP.data() + cute::max(size(sP), size(sdQ))).get())), - Shape>{}); - - typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; - auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); - using GmemTiledCopydO = - std::conditional_t; - GmemTiledCopydO gmem_tiled_copy_dO; - auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; - auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); - using GmemLayoutAtomdQaccum = - std::conditional_t; - GmemLayoutAtomdQaccum gmem_tiled_copy_dQaccum; - auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); - - Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); - Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); - Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); - Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); - Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); - Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) - Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); - Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) - Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); - Tensor tdQsdQ = - gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); - Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); - // if (cute::thread0()) { print(tdQgdQaccum.layout()); printf("\n"); } - // __syncthreads(); - // if (blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0 && tidx < 64) { - // printf("tidx = %d, tdQgdQaccum = 0x%p\n", tidx, tdQgdQaccum.data()); - // } - - typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; - auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); - Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) - Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) - Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) - Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) - - typename Kernel_traits::TiledMmadKV tiled_mma_dkv; - auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); - Tensor tdKrdSt = - thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdKrQt = - thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) - Tensor tdVrPt = - thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdVrdO = thr_mma_dkv.partition_fragment_B( - sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) - - typename Kernel_traits::TiledMmadQ tiled_mma_dq; - auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); - Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) - Tensor tdQrKt = - thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) - - Tensor acc_dk = partition_fragment_C( - tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - Tensor acc_dv = partition_fragment_C( - tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - - // - // Copy Atom retiling - // - - auto smem_tiled_copy_QdO = - make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); - Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); - Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); - - // auto smem_thr_copy_KV = make_tiled_copy_B(typename - // Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp).get_thread_slice(tidx); - auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN( - typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); - Tensor tSsK = smem_thr_copy_KV.partition_S(sK); - // if (cute::thread(0, 0) && n_block == 0) { printf("sK layout: "); - // print(sK.layout()); printf("\n"); } if (cute::thread(0, 0) && n_block == 0) - // { print(tSsK.layout()); printf("\n"); } - Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); - - // Partition sP and sdS to match the accumulator partitioning - // This has to be tiled_mma_sdp, not tiled_mma_dkv - // auto smem_thr_copy_PdS = make_tiled_copy_C(typename - // Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp).get_thread_slice(tidx); - auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN( - typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); - auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); - Tensor tPsP = - smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) - // if (cute::thread(0, 0) && n_block == 0) { printf("sP layout: "); - // print(sP.layout()); printf("\n"); } if (cute::thread(0, 0) && n_block == 0) - // { print(tPsP.layout()); printf("\n"); } if (n_block == 0 && blockIdx.x == 0 - // && blockIdx.y == 0 && tidx < 64) { - // printf("tidx=%d, tPsP = 0x%p\n", tidx, tPsP.data()); - // } - Tensor tdSsdS = - smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - auto smem_tiled_copy_PdSt = make_tiled_copy_A( - typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); - Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); - Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); - - auto smem_tiled_copy_QdOt = make_tiled_copy_B( - typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); - Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); - Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); - - auto smem_tiled_copy_dS = - make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); - auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); - Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); - - auto smem_tiled_copy_Kt = make_tiled_copy_B( - typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); - auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); - Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); - - auto smem_tiled_copy_dQ = - make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); - auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor taccdQsdQ = - smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - // - // PREDICATES - // - - Tensor cQ = make_identity_tensor( - make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor cKV = make_identity_tensor( - make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - Tensor tQcQ = gmem_thr_copy_QKV.partition_D(cQ); - Tensor tKVcKV = gmem_thr_copy_QKV.partition_D(cKV); - - // Allocate predicate tensors for k - Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); - Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); - - // Set predicates for k bounds - if (!Is_even_K) { -#pragma unroll - for (int k = 0; k < size(tQpQ); ++k) { - tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; - } -#pragma unroll - for (int k = 0; k < size(tKVpKV); ++k) { - tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; - } - } - - // Prologue - - // We'll advance gdQ and gdQaccum before the 1st read/write. - tdQgdQ.data() = tdQgdQ.data() + kBlockM * params.dq_row_stride; - tdQgdQaccum.data() = tdQgdQaccum.data() + kBlockM * params.d_rounded; - - int m_block = m_block_max - 1; - int m_block_min = !Is_causal ? 0 : (n_block * kBlockN) / kBlockM; - - // We might need to exit early and write 0 to dK and dV. - // Otherwise we get wrong result for the case where we don't enter the for - // loop. And we might read OOB elements from gQ and gdO. - // TODO: what if we're not parallelizing, do we need to compute dot_do_o? - if (Is_causal && m_block < m_block_min) { - const index_t row_offset_dk = - binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + - n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; - const index_t row_offset_dv = - binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + - n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; - Tensor gdK = - make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + - row_offset_dk), - Shape, Int>{}, - make_stride(params.dk_row_stride, _1{})); - Tensor gdV = - make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + - row_offset_dv), - Shape, Int>{}, - make_stride(params.dv_row_stride, _1{})); - typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; - auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); - Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); - Tensor tdKrdK = make_tensor(shape(tdKgdK)); - Tensor tdVrdV = make_tensor(shape(tdVgdV)); - clear(tdKrdK); - clear(tdVrdV); - Tensor cdKV = make_identity_tensor(make_shape( - size<0>(gdK), size<1>(gdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); - Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); -#pragma unroll - for (int k = 0; k < size(tdKVpdKV); ++k) { - tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; - } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy( - gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, - binfo.actual_seqlen_k - n_block * kBlockN); - flash::copy( - gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, - binfo.actual_seqlen_k - n_block * kBlockN); - return; - } - - if (Double_buffer && m_block % 2 == 1) { // Double buffer for sQ - tQsQ.data() = tQsQ.data() + size(sQ); - tSsQ.data() = tSsQ.data() + size(sQ); - tdKsQt.data() = tdKsQt.data() + size(sQ); - } - - if (!Is_first && !Seq_parallel) { - __syncthreads(); - } - - if (Kernel_traits::Is_V_in_regs) { - // Clear the smem tiles to account for predicated off loads - flash::copy( - gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, - binfo.actual_seqlen_k - n_block * kBlockN); - flash::cp_async_fence(); - } - - Tensor tdOrdO = make_fragment_like(tdOgdO); - Tensor tdOrO = make_fragment_like(tdOgO); - if (!Is_first) { - // Clear the smem tiles to account for predicated off loads - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOsdO, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - } else { - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - flash::copy( - gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - } - flash::copy( - gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - - Tensor caccS = make_identity_tensor( - Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) - Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) - static_assert(decltype(size<0>(taccScS))::value == 4); - // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. - Tensor taccScS_row = - logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); - Tensor lse = make_tensor( - Shape>{}); -#pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { - // Using uint32_t row makes it 10us slower on d=128, not sure why. - const int row = get<0>(taccScS_row(mi)); - lse(mi) = Is_even_MN || row < binfo.actual_seqlen_q - m_block * kBlockM - ? gLSE(row) - : 0; - } - - // Tensor tKrK = make_fragment_like(tKsK); - // // cute::copy(gmem_tiled_copy_QKV, tKgK(_, _, _, 0), tKrK); - // cute::copy(gmem_tiled_copy_QKV, tKgK, tKrK); - // // if (cute::thread(1, 0)) { print(tKrK); } - - flash::copy( - gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, - binfo.actual_seqlen_k - n_block * kBlockN); - if (!Kernel_traits::Is_V_in_regs) { - flash::copy( - gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, - binfo.actual_seqlen_k - n_block * kBlockN); - } - flash::cp_async_fence(); - - // if (cute::thread0()) { print(tdOgdO.layout()); printf("\n"); print(tdOrdO); - // print(tdOrO); } - if (Is_first) { - cute::copy(tdOrdO, tdOsdO); - dot_do_o( - tdOrdO, tdOrO, gdPsum, sdPsum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), - params.p_dropout); - } - - if (Kernel_traits::Is_V_in_regs) { - cute::cp_async_wait<1>(); - __syncthreads(); - Tensor tdPrV_copy_view = smem_thr_copy_KV.retile_D(tdPrV); - CUTE_STATIC_ASSERT_V(size<1>(tdPsV) == size<1>(tdPrV_copy_view)); // M - cute::copy(smem_tiled_copy_KV, tdPsV, tdPrV_copy_view); - } - - // auto seed = params.rng_state[0]; - // auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % - // 32; - - // deprecated: no rng support. - // unsigned long long seed = 0; - // unsigned long long offset = 0; - - unsigned long long seed = params.rng_state[0]; - unsigned long long offset = - params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; - - // if (block_id == 0 && tidx == 0) { - // printf("seed:%lu\n",seed); - // printf("offset:%lu\n",offset); - // } - - clear(acc_dv); - clear(acc_dk); - - for (; m_block >= m_block_min; --m_block) { - Tensor acc_s = partition_fragment_C( - tiled_mma_sdp, - Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) - clear(acc_s); - cute::cp_async_wait<0>(); - __syncthreads(); - - Tensor dP_sum = make_fragment_like(lse); -#pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { - dP_sum(mi) = gdPsum(get<0>(taccScS_row(mi))); - } - - // if (cute::thread0()) { print(sK); } - // Tensor tSrK_copy_view = smem_thr_copy_KV.retile_D(tSrK); - // #pragma unroll - // for (int k = 0; k < size<2>(tSrK_copy_view); ++k) { - // cute::copy(smem_tiled_copy_KV, tSsK(_, _, k), tSrK_copy_view(_, _, - // k)); - // } - // if (cute::thread0()) { print(tSrK); } - flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, - smem_thr_copy_KV); - - // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, - // MMA_N)) - Tensor scores = make_tensor( - acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); - // if (cute::thread(32, 0)) { print(scores); } - // TD [2023-07-29]: I was thinking that we don't need to mask out the - // elements beyond actual_seqlen_k, because acc_s would be some finite value - // for those indices. In the end when we multiply with K to get dQ, the - // corresponding values of K would be 0, so the result would still be - // correct. However, it's possible that the values in acc_s are so large - // that they overflow when we multiply with dP and convert to fp16, - // resulting in Inf in dS and NaNs in dQ. So we need to mask out the - // elements beyond actual_seqlen_k. - if (!Is_causal) { - if (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k) { - flash::apply_mask(scores, binfo.actual_seqlen_k, - n_block * kBlockN + - (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16); - } - } else { - // Putting this causal masking right after acc_s is *much* slower for some - // reason. TD [2023-08-16]: We need the 2nd condition because if seqlen_q - // is long and seqlen_k is short (e.g., 256 and 2), the 2nd block of - // seqlen_q (from 128 to 255), we're not doing causal masking. But we - // still want to mask out elements beyond actual_seqlen_k. - if (m_block * kBlockM < (n_block + 1) * kBlockN || - (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k)) { - flash::apply_mask_causal( - scores, - n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, - binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), - // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % - // AtomLayoutMS * 16 + (tidx % 32) / 4, - AtomLayoutMS * 16); - } - } - // if (cute::thread(32, 0)) { print(scores); } - // Compute the exponential value. - flash::scale_apply_exp2(scores, lse, - params.scale_softmax_log2); - if (Is_dropout) { - uint32_t warp_id = tidx / 32; - uint32_t block_row_idx = - m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; - // Need col to be multiples of 32, since we're doing dropout with block of - // 16 x 32 - static_assert(MMA_N_SdP % 2 == 0); - uint32_t block_col_idx = - n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); - Tensor scores_dropped = make_tensor( - scores.data(), - flash::convert_layout_rowcol_Aregs( - scores.layout())); - flash::apply_dropout( - scores_dropped, params.p_dropout_in_uint8_t, seed, offset, - block_row_idx, block_col_idx, AtomLayoutMS); - } - // Convert scores from fp32 to fp16/bf16 - Tensor rP = !Is_dropout ? flash::convert_type(scores) - : flash::convert_type_relu(scores); - // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, - // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using - // m16n8k8. - Tensor tPrP = make_tensor( - rP.data(), - flash::convert_layout_rowcol_Aregs( - rP.layout())); - Tensor tPaP = - smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); - // if (cute::thread0()) { print(tPaP); } - // __syncthreads(); - // if (cute::thread0()) { print(sP); } - - Tensor acc_dp = partition_fragment_C( - tiled_mma_sdp, - Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) - CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA - - clear(acc_dp); - // Tensor acc_dp_reshaped = make_tensor(acc_dp.data(), - // flash::convert_layout_acc_rowcol(acc_dp.layout())); #pragma unroll for - // (int mi = 0; mi < size<0>(acc_dp_reshaped); ++mi) { - // #pragma unroll - // for (int ni = 0; ni < size<1>(acc_dp_reshaped); ++ni) { - // acc_dp_reshaped(mi, ni) = -dP_sum(mi); - // } - // } - - // if (cute::thread0()) { print(dP_sum); } - - flash::gemm( - acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, - smem_thr_copy_KV); - - // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, - // MMA_N)) - Tensor dS = make_tensor(acc_dp.data(), scores.layout()); - auto pointwise_mult = [](float p, float dp, float d) { - return p * (!Is_dropout || p >= 0 ? dp - d : d); - }; -#pragma unroll - for (int mi = 0; mi < size<0>(dS); ++mi) { -#pragma unroll - for (int ni = 0; ni < size<1>(dS); ++ni) { - dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); - } - } - // if (cute::thread0()) { print(dS); } - - Tensor acc_dq = partition_fragment_C( - tiled_mma_dq, - Shape, Int>{}); // MMA, MMA_N, MMA_K - tdQgdQaccum.data() = - tdQgdQaccum.data() + (-int(kBlockM * params.d_rounded)); - if (Is_first || Seq_parallel) { - clear(acc_dq); - } else { - // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum - Tensor acc_dq_reshaped = - make_tensor(acc_dq.data(), make_layout(get<0>(acc_dq.layout()), - get<2>(acc_dq.layout()), - get<1>(acc_dq.layout()))); - cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, acc_dq_reshaped); - } - - if (Double_buffer && m_block > m_block_min) { - // Double buffer for sQ - const int sQ_offset = m_block % 2 == 0 ? size(sQ) : -size(sQ); - tQsQ.data() = tQsQ.data() + sQ_offset; - tSsQ.data() = tSsQ.data() + sQ_offset; - // Advance gQ - tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tQgQ, - tQsQ, tQcQ, tQpQ); - flash::cp_async_fence(); - } - - Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); - // Convert dS from fp32 to fp16 - Tensor tdSrdS = flash::convert_type(dS_reshaped); - // if (cute::thread0()) { print(tPrP); } - Tensor tdSadS = - smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); - __syncthreads(); - - // Layout p_l = tPrP.layout(); - // Tensor tdVrPt = make_tensor(tPrP.data(), make_layout(get<0>(p_l), - // get<2>(p_l), get<1>(p_l))); flash::gemm_A_in_regs(acc_dv, tdVrPt, tdVrdO, - // tdVsdOt, tiled_mma_dkv, smem_thr_copy_QdOt); Tensor tdKrdSt = - // make_tensor(tdSrdS.data(), tdVrPt.layout()); - // flash::gemm_A_in_regs(acc_dk, tdKrdSt, tdKrQt, tdKsQt, tiled_mma_dkv, - // smem_thr_copy_QdOt); - flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, - smem_thr_copy_QdOt); - // if (cute::thread0() && n_block == 0 && m_block == 0) { print(tdVrPt); } - // if (cute::thread0()) { print(acc_dv); } - - __syncthreads(); // Need syncthreads since we're writing to the same sdO - // location - - if (m_block > m_block_min) { - // Advance gdO - tdOgdO.data() = tdOgdO.data() + (-int(kBlockM * params.do_row_stride)); - if (Is_first) { - tdOgO.data() = tdOgO.data() + (-int(kBlockM * params.o_row_stride)); - flash::copy(gmem_tiled_copy_dO, tdOgdO, - tdOrdO, tQcQ, tQpQ); - flash::copy(gmem_tiled_copy_dO, tdOgO, - tdOrO, tQcQ, tQpQ); - } else { - flash::copy(gmem_tiled_copy_dO, tdOgdO, - tdOsdO, tQcQ, tQpQ); - flash::cp_async_fence(); - } - } - - flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, - smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, - smem_thr_copy_Kt); - // if (cute::thread0()) { print(acc_dq); } - - if (m_block > m_block_min) { - gLSE.data() = gLSE.data() + (-int(kBlockM)); -#pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { - lse(mi) = gLSE(get<0>(taccScS_row(mi))); - } - gdPsum.data() = gdPsum.data() + (-int(kBlockM)); - // if (!Is_first && tidx < kBlockM / 2) { - // sdPsum(tidx) = recast(gdPsum)(tidx); - // if (!Is_first && tidx < kBlockM) { - // recast(sdPsum)(tidx) = gdPsum(tidx); - // } - } - - if (!Is_last) { - // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum - Tensor acc_dq_reshaped = - make_tensor(acc_dq.data(), make_layout(get<0>(acc_dq.layout()), - get<2>(acc_dq.layout()), - get<1>(acc_dq.layout()))); - if (!Seq_parallel) { - cute::copy(gmem_tiled_copy_dQaccum, acc_dq_reshaped, tdQgdQaccum); - } else { - // if (cute::thread0()) { print(acc_dq.layout()); printf("\n"); - // print(acc_dq_reshaped.layout()); printf("\n"); - // print(tdQgdQaccum.layout()); printf("\n"); } - CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); -#pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { - atomicAdd(&tdQgdQaccum(i), acc_dq(i)); - } - } - } else { -#pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { - acc_dq(i) *= params.scale_softmax_rp_dropout; - } - // Convert acc_dq from fp32 to fp16 - Tensor rdQ = flash::convert_type(acc_dq); - Tensor taccdQrdQ = - smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); - } - - flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, - smem_thr_copy_QdOt); - // if (cute::thread0()) { print(acc_dk); } - if (Double_buffer) { // Double buffer for sQ - tdKsQt.data() = tdKsQt.data() + (m_block % 2 == 0 ? size(sQ) : -size(sQ)); - } - if (!Double_buffer && m_block > m_block_min) { - __syncthreads(); - // Advance gQ - tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tQgQ, - tQsQ, tQcQ, tQpQ); - flash::cp_async_fence(); - } - - if (Is_first && m_block > m_block_min) { - cute::copy(tdOrdO, tdOsdO); - dot_do_o( - tdOrdO, tdOrO, gdPsum, sdPsum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), - params.p_dropout); - } - - if (Is_last) { - __syncthreads(); - Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); - cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); - tdQgdQ.data() = tdQgdQ.data() + (-int(kBlockM * params.dq_row_stride)); - Tensor cdQ = make_identity_tensor( - Shape, - Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); -#pragma unroll - for (int m = 0; m < size<1>(tdQgdQ); ++m) { - if (Is_even_MN || get<0>(tdQcdQ(0, m, 0)) < - binfo.actual_seqlen_q - m_block * kBlockM) { - cute::copy(gmem_tiled_copy_dQ, tdQrdQ(_, m, _), tdQgdQ(_, m, _)); - } - } - } - } - - // Epilogue - - if (Is_dropout) { -#pragma unroll - for (int i = 0; i < size(acc_dv); ++i) { - acc_dv(i) *= params.rp_dropout; - } - } -#pragma unroll - for (int i = 0; i < size(acc_dk); ++i) { - acc_dk(i) *= params.scale_softmax_rp_dropout; - } - - // Convert acc_dv from fp32 to fp16 - Tensor rdK = flash::convert_type(acc_dk); - Tensor rdV = flash::convert_type(acc_dv); - - Tensor sdK = make_tensor( - sK.data(), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) - Tensor sdV = - make_tensor(sdK.data() + size(sdK), - typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) - - // Partition sdV and sdK to match the accumulator partitioning - auto smem_tiled_copy_dKV = make_tiled_copy_C( - typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); - auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor taccdKrdK = - smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdKsdK = - smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) - Tensor taccdVrdV = - smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdVsdV = - smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - // We need syncthreads here since we're writing to the same location as sK and - // sV. Without syncthreads, some thread might modify the location of sK while - // another thread is reading it for dQ gemm, leading to a race condition. If - // Is_last, there's already a __syncthreads() at the end of the loop. - if (!Is_last) { - __syncthreads(); - } - - cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); - cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); - - const index_t row_offset_dk = - binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + - n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; - const index_t row_offset_dv = - binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + - n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; - Tensor gdK = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), - Shape, Int>{}, - make_stride(params.dk_row_stride, _1{})); - Tensor gdV = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), - Shape, Int>{}, - make_stride(params.dv_row_stride, _1{})); - - typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; - auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor tdKsdK = - gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); - Tensor tdVsdV = - gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); - - __syncthreads(); - Tensor tdKrdK = make_tensor(shape(tdKgdK)); - cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); - Tensor tdVrdV = make_tensor(shape(tdVgdV)); - cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); - Tensor cdKV = make_identity_tensor( - make_shape(size<0>(sdK), size<1>(sdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); - Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); -#pragma unroll - for (int k = 0; k < size(tdKVpdKV); ++k) { - tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; - } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy(gmem_tiled_copy_dKV, tdKrdK, tdKgdK, - tdKVcdKV, tdKVpdKV, - binfo.actual_seqlen_k - n_block * kBlockN); - flash::copy(gmem_tiled_copy_dKV, tdVrdV, tdVgdV, - tdKVcdKV, tdKVpdKV, - binfo.actual_seqlen_k - n_block * kBlockN); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void -compute_dq_dk_dv_1rowblock(const Params ¶ms, const int bidb, const int bidh, - const int m_block) { - - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - // constexpr int kNWarps = Kernel_traits::kNWarps; - constexpr int MMA_N_SdP = - kBlockN / - decltype(size<1>( - typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; - constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; - - const BlockInfo binfo(params, bidb); - if (m_block * kBlockM >= binfo.actual_seqlen_q || binfo.actual_seqlen_k == 0) - return; - - int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN); - if (Is_causal) { - n_block_max = - std::min(n_block_max, cute::ceil_div((m_block + 1) * kBlockM, kBlockN)); - } - - // We iterate over the blocks in reverse order. This is because the last block - // is the only one that needs masking when we read K and V from global memory. - // Moreover, iterating in reverse might save us 1 register (we just need - // n_block instead of both n_block and n_block_max). - - const index_t row_offset_q = - binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + - m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride; - // We move K and V to the last block. - const index_t row_offset_k = - binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + - (n_block_max - 1) * kBlockN * params.k_row_stride + - (bidh / params.h_h_k_ratio) * params.k_head_stride; - const index_t row_offset_v = - binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + - (n_block_max - 1) * kBlockN * params.v_row_stride + - (bidh / params.h_h_k_ratio) * params.v_head_stride; - const index_t row_offset_do = - binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + - m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; - const index_t row_offset_o = - binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + - m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; - // We'll advance gdKaccum and gdVaccum before the first write. - const index_t row_offset_dkv_accum = - ((bidb * params.h_k + (bidh / params.h_h_k_ratio)) * - params.seqlen_k_rounded + - n_block_max * kBlockN) * - params.d_rounded; - const index_t row_offset_lse = - (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM; - - // We assume that params.d == kHeadDim for now - Tensor gQ = make_tensor( - make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), - Shape, Int>{}, - make_stride(params.q_row_stride, _1{})); - Tensor gK = make_tensor( - make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), - Shape, Int>{}, - make_stride(params.k_row_stride, _1{})); - Tensor gV = make_tensor( - make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), - Shape, Int>{}, - make_stride(params.v_row_stride, _1{})); - Tensor gdO = make_tensor( - make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), - Shape, Int>{}, - make_stride(params.do_row_stride, _1{})); - Tensor gO = make_tensor( - make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), - Shape, Int>{}, - make_stride(params.o_row_stride, _1{})); - Tensor gdKaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + - row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - Tensor gdVaccum = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + - row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - Tensor gLSE = make_tensor( - make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + - row_offset_lse), - Shape>{}, Stride<_1>{}); - - Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutQdO{}); - Tensor sQt = - make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sQtNoSwizzle = make_tensor( - sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - Tensor sdO = make_tensor(sQ.data() + size(sQ), - typename Kernel_traits::SmemLayoutQdO{}); - Tensor sdOt = make_tensor(sdO.data(), - typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sdOtransposedNoSwizzle = make_tensor( - sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - Tensor sK = make_tensor(sdO.data() + size(sdO), - typename Kernel_traits::SmemLayoutKV{}); - // Double buffer for sK - Tensor sV = make_tensor(sK.data() + 2 * size(sK), - typename Kernel_traits::SmemLayoutKV{}); - Tensor sKt = - make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); - Tensor sKtNoSwizzle = make_tensor( - sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); - Tensor sdS = make_tensor(sV.data() + size(sV), - typename Kernel_traits::SmemLayoutPdS{}); - Tensor sdSt = make_tensor(sdS.data(), - typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sdStNoSwizzle = make_tensor( - sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - Tensor sP = make_tensor(sdS.data() + size(sdS), - typename Kernel_traits::SmemLayoutPdS{}); - Tensor sPt = - make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sPtNoSwizzle = make_tensor( - sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - Tensor sdPsum = make_tensor( - make_smem_ptr(reinterpret_cast(sdS.data().get())), - Shape>{}); - - typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; - auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; - auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd - gmem_tiled_copy_dKVaccum; - auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); - - Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); - Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); - Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); - Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); - Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); - Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) - Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); - Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) - Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); - Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); - Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); - - typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; - auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); - Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) - Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) - Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) - Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) - - typename Kernel_traits::TiledMmadKV tiled_mma_dkv; - auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); - Tensor tdKrdSt = - thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdKrQt = - thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) - Tensor tdVrPt = - thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdVrdO = thr_mma_dkv.partition_fragment_B( - sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) - - typename Kernel_traits::TiledMmadQ tiled_mma_dq; - auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); - Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) - Tensor tdQrKt = - thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) - - Tensor acc_dq = partition_fragment_C( - tiled_mma_dq, - Shape, Int>{}); // MMA, MMA_M_SdP, MMA_K - - // - // Copy Atom retiling - // - - auto smem_tiled_copy_QdO = - make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); - Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); - Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); - - auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN( - typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); - Tensor tSsK = smem_thr_copy_KV.partition_S(sK); - Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); - - // Partition sP and sdS to match the accumulator partitioning - // This has to be tiled_mma_sdp, not tiled_mma_dkv - auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN( - typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); - auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); - Tensor tPsP = - smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) - Tensor tdSsdS = - smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - auto smem_tiled_copy_PdSt = make_tiled_copy_A( - typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); - Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); - Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); - - auto smem_tiled_copy_QdOt = make_tiled_copy_B( - typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); - Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); - Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); - - auto smem_tiled_copy_dS = - make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); - auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); - Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); - - auto smem_tiled_copy_Kt = make_tiled_copy_B( - typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); - auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); - Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); - - // - // PREDICATES - // - - // Construct identity layout for sQ and sK - Tensor cQ = make_identity_tensor( - make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor cKV = make_identity_tensor( - make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - // Repeat the partitioning with identity layouts - Tensor tQcQ = gmem_thr_copy_QKV.partition_S( - cQ); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) - Tensor tKVcKV = gmem_thr_copy_QKV.partition_S( - cKV); // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k) - - // Allocate predicate tensors for k - Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); - Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); - - // Set predicates for k bounds - if (!Is_even_K) { -#pragma unroll - for (int k = 0; k < size(tQpQ); ++k) { - tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; - } -#pragma unroll - for (int k = 0; k < size(tKVpKV); ++k) { - tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; - } - } - - // Prologue - - Tensor tdOrdO = make_fragment_like(tdOgdO); - Tensor tdOrO = make_fragment_like(tdOgO); - - // TODO: Might need to exit early and write 0 to gdQ. - - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - flash::copy( - gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - - Tensor tQrQ = make_fragment_like(tQgQ); - flash::copy( - gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - - int n_block = n_block_max - 1; - if (n_block % 2 == 1) { - tKsK.data() = tKsK.data() + size(sK); - tSsK.data() = tSsK.data() + size(sK); - tdQsKt.data() = tdQsKt.data() + size(sK); - } - - flash::copy( - gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, - binfo.actual_seqlen_k - n_block * kBlockN); - flash::copy( - gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, - binfo.actual_seqlen_k - n_block * kBlockN); - - Tensor caccS = make_identity_tensor( - Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) - Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) - static_assert(decltype(size<0>(taccScS))::value == 4); - // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. - Tensor taccScS_row = - logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); - Tensor lse = make_tensor( - Shape>{}); -#pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { - // Using uint32_t row makes it 10us slower on d=128, not sure why. - const int row = get<0>(taccScS_row(mi)); - lse(mi) = row < binfo.actual_seqlen_q - m_block * kBlockM ? gLSE(row) : 0; - } - - cute::cp_async_fence(); - - Tensor dP_sum = make_fragment_like(lse); - cute::copy(tdOrdO, tdOsdO); - dot_do_o( - tdOrdO, tdOrO, sdPsum, sdPsum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), - params.p_dropout); - __syncthreads(); -#pragma unroll - for (int mi = 0; mi < size(dP_sum); ++mi) { - dP_sum(mi) = sdPsum(get<0>(taccScS_row(mi))); - } - - // auto seed = params.rng_state[0]; - // auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % - // 32; - - // deprecated: no rng support. - // unsigned long long seed = 0; - // unsigned long long offset = 0; - - unsigned long long seed = params.rng_state[0]; - unsigned long long offset = - params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; - - // if (block_id == 0 && tidx == 0) { - // printf("seed:%lu\n",seed); - // printf("offset:%lu\n",offset); - // } - - clear(acc_dq); - - for (; n_block >= 0; --n_block) { - Tensor acc_s = partition_fragment_C( - tiled_mma_sdp, - Shape, Int>{}); // (MMA=4, MMA_M_SdP, MMA_N) - clear(acc_s); - flash::cp_async_wait<0>(); - __syncthreads(); - - flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, - smem_thr_copy_KV); - - // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, - // MMA_N)) - Tensor scores = make_tensor( - acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); - // We don't need to mask out the elements beyond actual_seqlen_k, because - // acc_s would be some finite value for those indices. In the end when we - // multiply with K to get dQ, the corresponding values of K would be 0, so - // the result would still be correct. - if (Is_causal && m_block * kBlockM < (n_block + 1) * kBlockN) { - flash::apply_mask_causal( - scores, - n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, - binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), - // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % - // AtomLayoutMS * 16 + (tidx % 32) / 4, - AtomLayoutMS * 16); - } - // Compute the exponential value. - flash::scale_apply_exp2(scores, lse, - params.scale_softmax_log2); - if (Is_dropout) { - uint32_t warp_id = tidx / 32; - uint32_t block_row_idx = - m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; - // Need col to be multiples of 32, since we're doing dropout with block of - // 16 x 32 - static_assert(MMA_N_SdP % 2 == 0); - uint32_t block_col_idx = - n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); - Tensor scores_dropped = make_tensor( - scores.data(), - flash::convert_layout_rowcol_Aregs( - scores.layout())); - flash::apply_dropout( - scores_dropped, params.p_dropout_in_uint8_t, seed, offset, - block_row_idx, block_col_idx, AtomLayoutMS); - } - // Convert scores from fp32 to fp16/bf16 - Tensor rP = !Is_dropout ? flash::convert_type(scores) - : flash::convert_type_relu(scores); - // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, - // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using - // m16n8k8. - Tensor tPrP = make_tensor( - rP.data(), - flash::convert_layout_rowcol_Aregs( - rP.layout())); - Tensor tPaP = - smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); - - Tensor acc_dp = partition_fragment_C( - tiled_mma_sdp, - Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) - CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA - - clear(acc_dp); - flash::gemm(acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, - smem_thr_copy_KV); - - // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, - // MMA_N)) - Tensor dS = make_tensor(acc_dp.data(), scores.layout()); - auto pointwise_mult = [](float p, float dp, float d) { - return p * (!Is_dropout || p >= 0 ? dp - d : d); - }; -#pragma unroll - for (int mi = 0; mi < size<0>(dS); ++mi) { -#pragma unroll - for (int ni = 0; ni < size<1>(dS); ++ni) { - dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); - } - } - - Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); - // Convert dS from fp32 to fp16 - Tensor tdSrdS = flash::convert_type(dS_reshaped); - Tensor tdSadS = - smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); - __syncthreads(); - - if (n_block > 0) { - // Double buffer for sK - const int sK_offset = n_block % 2 == 0 ? size(sK) : -size(sK); - tKsK.data() = tKsK.data() + sK_offset; - tSsK.data() = tSsK.data() + sK_offset; - // Advance gK, gV - tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); - tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tKgK, - tKsK, tKVcKV, tKVpKV); - flash::copy(gmem_tiled_copy_QKV, tVgV, - tVsV, tKVcKV, tKVpKV); - // This cp_async_fence needs to be in the if block, otherwise the - // synchronization isn't right and we get race conditions. - cute::cp_async_fence(); - } - - Tensor acc_dv = partition_fragment_C( - tiled_mma_dkv, - Shape, Int>{}); // MMA, MMA_N, MMA_K - clear(acc_dv); - flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, - smem_thr_copy_QdOt); - // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { - // print(acc_dv); } - tdVgdVaccum.data() = - tdVgdVaccum.data() + (-int(kBlockN * params.d_rounded)); -#pragma unroll - for (int i = 0; i < size(acc_dv); ++i) { - atomicAdd(&tdVgdVaccum(i), acc_dv(i)); - } - - __syncthreads(); - Tensor acc_dk = partition_fragment_C( - tiled_mma_dkv, - Shape, Int>{}); // MMA, MMA_N, MMA_K - clear(acc_dk); - flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, - smem_thr_copy_QdOt); - tdKgdKaccum.data() = - tdKgdKaccum.data() + (-int(kBlockN * params.d_rounded)); -#pragma unroll - for (int i = 0; i < size(acc_dk); ++i) { - atomicAdd(&tdKgdKaccum(i), acc_dk(i)); - } - - flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, - smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, - smem_thr_copy_Kt); - // Double buffer for sK - tdQsKt.data() = tdQsKt.data() + (n_block % 2 == 0 ? size(sK) : -size(sK)); - } - - // Epilogue - -#pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { - acc_dq(i) *= params.scale_softmax_rp_dropout; - } - // Convert acc_dq from fp32 to fp16 - Tensor rdQ = flash::convert_type(acc_dq); - - Tensor sdQ = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutdQ{}); - - // Partition sdV and sdK to match the accumulator partitioning - auto smem_tiled_copy_dQ = - make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); - auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor taccdQrdQ = - smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdQsdQ = - smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - __syncthreads(); - cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); - - const index_t row_offset_dq = - binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + - m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; - Tensor gdQ = make_tensor( - make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), - Shape, Int>{}, - make_stride(params.dq_row_stride, _1{})); - - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; - auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor tdQsdQ = - gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); - - __syncthreads(); - - Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); - cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); - - Tensor cdQ = make_identity_tensor( - Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); - Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); - if (!Is_even_K) { -#pragma unroll - for (int k = 0; k < size(tdQpdQ); ++k) { - tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; - } - } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy(gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, - tdQpdQ, - binfo.actual_seqlen_q - m_block * kBlockM); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void compute_dq_dk_dv(const Params ¶ms) { - - // The block index for the batch. - const int bidb = blockIdx.x; - // const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.y; - // const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - const int n_block_max = - (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; - if (n_block_max == 1) { - compute_dq_dk_dv_1colblock(params, bidb, bidh, 0); - } else { - // Iterating backward from n_block_max - 1 to 0 might save 1 register - compute_dq_dk_dv_1colblock(params, bidb, bidh, - n_block_max - 1); - for (int n_block = n_block_max - 2; n_block > 0; n_block--) { - compute_dq_dk_dv_1colblock( - params, bidb, bidh, n_block); - } - compute_dq_dk_dv_1colblock(params, bidb, bidh, 0); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void compute_dq_dk_dv_seqk_parallel(const Params ¶ms) { - - const int n_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - - compute_dq_dk_dv_1colblock( - params, bidb, bidh, n_block); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void compute_dq_dk_dv_seqq_parallel(const Params ¶ms) { - - const int m_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - - compute_dq_dk_dv_1rowblock(params, bidb, bidh, m_block); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace flash -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_launch_template.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_launch_template.h deleted file mode 100644 index 3cb465f18..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_bwd_launch_template.h +++ /dev/null @@ -1,501 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -#pragma once - -// #include - -#include "flash.h" -#include "flash_bwd_kernel.h" -#include "static_switch.h" - -namespace brt { -namespace cuda { -namespace kernel { -template -__global__ void flash_bwd_dot_do_o_kernel(Flash_bwd_params params) { - flash::compute_dot_do_o(params); -} - -template -__global__ void flash_bwd_clear_dkvaccum_kernel(Flash_bwd_params params) { - flash::clear_dKVaccum(params); -} - -template -__global__ void flash_bwd_dq_dk_dv_loop_kernel(Flash_bwd_params params) { - flash::compute_dq_dk_dv(params); -} - -template -__global__ void -flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel(Flash_bwd_params params) { - flash::compute_dq_dk_dv_seqk_parallel(params); -} - -template -__global__ void -flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel(Flash_bwd_params params) { - flash::compute_dq_dk_dv_seqq_parallel(params); -} - -template -__global__ void flash_bwd_convert_dq_kernel(Flash_bwd_params params) { - flash::convert_dQ(params); -} - -template -__global__ void flash_bwd_convert_dkv_kernel(Flash_bwd_params params) { - flash::convert_dKV(params); -} - -template -void run_flash_bwd_seqk_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - const int num_m_block = - (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; - dim3 grid_m(num_m_block, params.b, params.h); - const int num_n_block = - (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; - dim3 grid_n(num_n_block, params.b, params.h); - - flash_bwd_dot_do_o_kernel - <<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - - // We want to specialize to is_even_MN and not just is_even_M, since in the - // case where N is not a multiple of kBlockN, we'll need to apply mask in the - // loop. - const bool is_even_MN = params.cu_seqlens_q == nullptr && - params.cu_seqlens_k == nullptr && - params.seqlen_q % Kernel_traits::kBlockM == 0 && - params.seqlen_k % Kernel_traits::kBlockN == 0; - const bool is_even_K = params.d == Kernel_traits::kHeadDim; - constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1colblock; - // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); - BOOL_SWITCH(params.is_causal, IsCausalConst, [&] { - BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - auto kernel = &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel< - Kernel_traits, Is_dropout, IsCausalConst, IsEvenMNConst, - IsEvenKConst>; - // auto kernel = - // &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel; - if (smem_size_dq_dk_dv >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, - // smem_size_dq_dk_dv)); - cudaFuncSetAttribute(kernel, - cudaFuncAttributeMaxDynamicSharedMemorySize, - smem_size_dq_dk_dv); - } - kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - }); - }); - - auto kernel_dq = &flash_bwd_convert_dq_kernel; - if (Kernel_traits::kSmemdQSize >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, - // Kernel_traits::kSmemdQSize)); - cudaFuncSetAttribute(kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, - Kernel_traits::kSmemdQSize); - } - kernel_dq<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); -} - -template -void run_flash_bwd_seqq_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - const int num_n_block = - (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; - dim3 grid_n(num_n_block, params.b, params.h_k); - flash_bwd_clear_dkvaccum_kernel - <<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - - const int num_m_block = - (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; - dim3 grid_m(num_m_block, params.b, params.h); - // We also use is_even_N to set Unpadded in the BlockInfo constructor, so we - // need to check for cu_seqlens_k as well. - const bool is_even_N = params.cu_seqlens_q == nullptr && - params.cu_seqlens_k == nullptr && - params.seqlen_k % Kernel_traits::kBlockN == 0; - const bool is_even_K = params.d == Kernel_traits::kHeadDim; - constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1rowblock; - // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); - BOOL_SWITCH(params.is_causal, IsCausalConst, [&] { - BOOL_SWITCH(is_even_N, IsEvenNConst, [&] { - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - auto kernel = &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel< - Kernel_traits, Is_dropout, IsCausalConst, IsEvenNConst, - IsEvenKConst>; - // auto kernel = - // &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel; - if (smem_size_dq_dk_dv >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, - // smem_size_dq_dk_dv)); - cudaFuncSetAttribute(kernel, - cudaFuncAttributeMaxDynamicSharedMemorySize, - smem_size_dq_dk_dv); - } - kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - }); - }); - - auto kernel_dkv = &flash_bwd_convert_dkv_kernel; - if (Kernel_traits::kSmemKVSize >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel_dkv, cudaFuncAttributeMaxDynamicSharedMemorySize, - // Kernel_traits::kSmemKVSize)); - cudaFuncSetAttribute(kernel_dkv, - cudaFuncAttributeMaxDynamicSharedMemorySize, - Kernel_traits::kSmemKVSize); - } - kernel_dkv<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); -} -// - -template -void run_flash_bwd(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - if (configure) - return; - // dim3 grid(params.b, params.h); - // const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / - // Kernel_traits::kBlockM; dim3 grid_m(num_m_block, params.b, params.h); - - // if (params.h == params.h_k) { // No multi-query or grouped-query attention - // (MQA/GQA) - run_flash_bwd_seqk_parallel(params, stream, - configure); - // } else { - // run_flash_bwd_seqq_parallel(params, stream, - // configure); - // } - - // // We also use is_even_M to set Unpadded in the BlockInfo constructor, so - // we need to check - // // for cu_seqlens_q as well. - // const bool is_even_M = params.cu_seqlens_q == nullptr && - // params.cu_seqlens_k == nullptr && params.seqlen_q % Kernel_traits::kBlockM - // == 0; const bool is_even_K = params.d == Kernel_traits::kHeadDim; constexpr - // int smem_size_dq_dk_dv = Kernel_traits::kSmemSize; - // BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - // BOOL_SWITCH(params.is_causal, IsCausalConst, [&] { - // BOOL_SWITCH(is_even_M, IsEvenMConst, [&] { - // BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - // // auto kernel = - // &flash_bwd_dq_dk_dv_loop_kernel; auto kernel = - // &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel; if - // (smem_size_dq_dk_dv >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel, - // cudaFuncAttributeMaxDynamicSharedMemorySize, - // smem_size_dq_dk_dv)); - // } - // kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - // }); - // }); - // }); - // }); - - // auto kernel_dq = &flash_bwd_convert_dq_kernel; - // if (Kernel_traits::kSmemdQSize >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, - // Kernel_traits::kSmemdQSize)); - // } - // kernel_dq<<>>(params); C10_CUDA_KERNEL_LAUNCH_CHECK(); -} -// - -template -void run_mha_bwd_hdim32(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 32; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= - 2 * ((3 * 128 + 2 * 128) * Headdim + 2 * 128 * 128)) { // 104 KB - if constexpr (!Is_dropout) { // We can afford more registers to keep V in - // registers - run_flash_bwd, - Is_dropout>(params, stream, configure); - } else { - run_flash_bwd, - Is_dropout>(params, stream, configure); - } - } else { // 96 KB - run_flash_bwd, - Is_dropout>(params, stream, configure); - } - }); -} - -template -void run_mha_bwd_hdim64(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 64; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - // Changing AtomLayoutMdQ from 2 to 4 takes the same time - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); This is - // slightly faster. We want to split M more so we need fewer registers to - // store LSE. - if (max_smem_per_block >= 144 * 1024) { - run_flash_bwd, - Is_dropout>(params, stream, configure); - // This has a lot of register spilling - // run_flash_bwd, Is_dropout>(params, stream, configure); - } else { - // if (params.h == params.h_k) { - // run_flash_bwd, Is_dropout>(params, stream, configure); - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // } else { - // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, - // configure); - // } - } - }); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); M=128, N=64 is quite slow, I think - // because we need to read/write dQaccum twice as many times - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - - // run_flash_bwd>(params, stream, configure); -} - -template -void run_mha_bwd_hdim96(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 96; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - // if (params.h == params.h_k) { - if (max_smem_per_block >= 116 * 1024) { - if constexpr (!Is_dropout) { // 92KB - run_flash_bwd, - Is_dropout>(params, stream, configure); - } else { // 116 KB - // This is faster for dropout since we don't have many registers to - // spare - run_flash_bwd, - Is_dropout>(params, stream, configure); - } - } else { - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } - // } else { - // run_flash_bwd_seqq_parallel>(params, stream, configure); - // } - }); -} - -template -void run_mha_bwd_hdim128(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 128; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - // if (params.h == params.h_k) { - // run_flash_bwd>(params, stream, configure); This is faster, in the case - // of sequence-parallel bwd (where we need fewer registers). Out of these - // three, the 2nd one is slightly faster (2% faster than the first). Idk - // why. run_flash_bwd>(params, stream, configure); - if (max_smem_per_block >= 144 * 1024) { - run_flash_bwd, - Is_dropout>(params, stream, configure); - // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); - // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); - // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - } else { - // run_flash_bwd, Is_dropout>(params, stream, configure); - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } - // run_flash_bwd>(params, stream, configure); - - // run_flash_bwd>(params, stream, configure); - // } else { - // run_flash_bwd_seqq_parallel>(params, stream, configure); - // } - }); -} - -template -void run_mha_bwd_hdim160(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 160; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= 116 * 1024) { - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } else { - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } - }); -} - -template -void run_mha_bwd_hdim192(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 192; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= 136 * 1024) { - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } else { - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } - }); -} - -template -void run_mha_bwd_hdim224(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 224; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - }); -} - -template -void run_mha_bwd_hdim256(Flash_bwd_params ¶ms, cudaStream_t stream, - const bool configure) { - constexpr int Headdim = 256; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= 176 * 1024) { // H100 - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } else { // A100, we don't do double buffering to save smem - run_flash_bwd< - Flash_bwd_kernel_traits, - Is_dropout>(params, stream, configure); - } - }); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_bf16_sm80.cu deleted file mode 100644 index 09f30d69b..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_bf16_sm80.cu +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::bfloat16_t; -// if (params.p_dropout == 1.f) { -// run_flash_fwd, false>(params, stream); -// } else { -// run_flash_fwd, true>(params, stream); -// } -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim128(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_fp16_sm80.cu deleted file mode 100644 index 15ae93d1c..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim128_fp16_sm80.cu +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::half_t; -// if (params.p_dropout == 1.f) { -// // Using 8 warps (128 x 128 and 256 x 64) is 28% slower for seqlen=2k -// run_flash_fwd, false>(params, stream); -// // run_flash_fwd, false>(params, stream); -// // run_flash_fwd, false>(params, stream); -// // run_flash_fwd, false>(params, stream); -// run_flash_fwd, false>(params, stream); -// run_flash_fwd, false>(params, stream); -// run_flash_fwd, false>(params, stream); -// // 1st ones are good for H100, A100 -// // 2nd one is good for A6000 bc we get slightly better occupancy -// } else { -// run_flash_fwd, true>(params, stream); -// run_flash_fwd, true>(params, stream); -// run_flash_fwd, true>(params, stream); -// // 1st one is good for H100, A100, A6000 -// } -// } - -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim128(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_bf16_sm80.cu deleted file mode 100644 index db7e114b0..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_bf16_sm80.cu +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::bfloat16_t; -// BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { -// run_flash_fwd, Is_dropout>(params, stream); -// }); -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim160(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_fp16_sm80.cu deleted file mode 100644 index 1f9576326..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim160_fp16_sm80.cu +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::half_t; -// BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { -// run_flash_fwd, Is_dropout>(params, stream); -// run_flash_fwd, Is_dropout>(params, stream); -// run_flash_fwd, Is_dropout>(params, stream); -// run_flash_fwd, Is_dropout>(params, stream); -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // For A6000, no-causal, 1st is fastest. causal, 4th is fastest. -// // For A100, H100, 1st is fastest. -// }); -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim160(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_bf16_sm80.cu deleted file mode 100644 index 3e800ff2d..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_bf16_sm80.cu +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::bfloat16_t; -// BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { -// run_flash_fwd, Is_dropout>(params, stream); -// }); -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim192(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_fp16_sm80.cu deleted file mode 100644 index 025a4c57b..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim192_fp16_sm80.cu +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::half_t; -// BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { -// run_flash_fwd, Is_dropout>(params, stream); -// run_flash_fwd, Is_dropout>(params, stream); -// run_flash_fwd, Is_dropout>(params, stream); -// // This one is slightly faster for causal? -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// }); -// // For A100 H100, 1st is faster with dropout, 3rd is faster without -// dropout -// // For A6000, 1st is faster when causal, 3rd is faster when not causal -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim192(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_bf16_sm80.cu deleted file mode 100644 index f2d197404..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_bf16_sm80.cu +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim224(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_fp16_sm80.cu deleted file mode 100644 index cb6c69836..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim224_fp16_sm80.cu +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim224(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_bf16_sm80.cu deleted file mode 100644 index 56e936794..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_bf16_sm80.cu +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim256(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_fp16_sm80.cu deleted file mode 100644 index 8e74bf3cd..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim256_fp16_sm80.cu +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim256(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_bf16_sm80.cu deleted file mode 100644 index e018a58c9..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_bf16_sm80.cu +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim32(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_fp16_sm80.cu deleted file mode 100644 index c4a76b088..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim32_fp16_sm80.cu +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t -// stream) { -// using elem_type = cutlass::half_t; -// BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { -// run_flash_fwd, Is_dropout>(params, stream); -// // For dropout there might be a lot of register spilling? -// // These two are very slow due to register spilling -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// // This one is slightly slower -// // run_flash_fwd>(params, stream); -// }); -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim32(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_bf16_sm80.cu deleted file mode 100644 index 8cacf18c2..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_bf16_sm80.cu +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::bfloat16_t; -// if (params.p_dropout == 1.f) { -// run_flash_fwd, false>(params, stream); -// } else { -// run_flash_fwd, true>(params, stream); -// } -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim64(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_fp16_sm80.cu deleted file mode 100644 index d2566f8be..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim64_fp16_sm80.cu +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t -// stream) { -// using elem_type = cutlass::half_t; -// if (params.p_dropout == 1.f) { -// // Using 8 warps is 18% slower for seqlen=2k, 2 warps is 5% slower -// // Using block size (64 x 256) is 27% slower for seqlen=2k -// // Using block size (256 x 64) is 85% slower for seqlen=2k, because -// of register spilling run_flash_fwd, false>(params, stream); -// run_flash_fwd, false>(params, stream); -// run_flash_fwd, false>(params, stream); -// } else { -// run_flash_fwd, true>(params, stream); -// run_flash_fwd, true>(params, stream); -// run_flash_fwd, true>(params, stream); -// } -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim64(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_bf16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_bf16_sm80.cu deleted file mode 100644 index a9471aef3..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_bf16_sm80.cu +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, -// cudaStream_t stream) { -// using elem_type = cutlass::bfloat16_t; -// BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { -// run_flash_fwd, Is_dropout>(params, stream); -// }); -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim96(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_fp16_sm80.cu b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_fp16_sm80.cu deleted file mode 100644 index ab07eaf04..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_hdim96_fp16_sm80.cu +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2023, Tri Dao. - -// Splitting the different head dimensions to different files to speed up -// compilation. - -#include "flash_fwd_launch_template.h" -namespace brt { -namespace cuda { -namespace kernel { -// template<> -// void run_mha_fwd_(Flash_fwd_params ¶ms, cudaStream_t -// stream) { -// using elem_type = cutlass::half_t; -// BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { -// run_flash_fwd, Is_dropout>(params, stream); -// run_flash_fwd, Is_dropout>(params, stream); -// // This 3rd one is good for H100, and A100, A6000 -// run_flash_fwd, Is_dropout>(params, stream); -// run_flash_fwd, Is_dropout>(params, stream); -// // These two are always slower -// // run_flash_fwd>(params, stream); -// // run_flash_fwd>(params, stream); -// }); -// } -template <> -void run_mha_fwd_(Flash_fwd_params ¶ms, - cudaStream_t stream) { - run_mha_fwd_hdim96(params, stream); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_kernel.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_kernel.h deleted file mode 100644 index d89242b36..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_kernel.h +++ /dev/null @@ -1,732 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "block_info.h" -#include "kernel_traits.h" -#include "philox.cuh" -#include "softmax.h" -#include "utils.h" -namespace brt { -namespace cuda { -namespace kernel { -namespace flash { - -using namespace cute; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -CUTE_HOST_DEVICE auto -make_tiled_copy_A_warpcontiguousM(Copy_Atom const ©_atom, - TiledMMA const &tiled_mma) { - using TileShape_MNK = typename TiledMMA::TiledShape_MNK; - using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; - constexpr int AtomShape_M = decltype(size<0>(AtomShape_MNK{}))::value; - constexpr int kNWarps = - decltype(size<0>(TileShape_MNK{}))::value / AtomShape_M; - constexpr int MMAStride_M = MMA_M * AtomShape_M; - auto t = make_tile(Layout, Int>, - Stride<_1, Int>>{}, - make_layout(size<2>(TileShape_MNK{}))); - // if (cute::thread0()) {printf("make_tiled_copy_A_warpcontiguousM "); - // print(t); printf("\n"); } - return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutA_TV(), t); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -CUTE_HOST_DEVICE auto -make_tiled_copy_C_warpcontiguousM(Copy_Atom const ©_atom, - TiledMMA const &tiled_mma) { - using TileShape_MNK = typename TiledMMA::TiledShape_MNK; - using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; - constexpr int AtomShape_M = decltype(size<0>(AtomShape_MNK{}))::value; - constexpr int kNWarps = - decltype(size<0>(TileShape_MNK{}))::value / AtomShape_M; - constexpr int MMAStride_M = MMA_M * AtomShape_M; - auto t = make_tile(Layout, Int>, - Stride<_1, Int>>{}, - // TODO: Shouldn't this be size<1>? - make_layout(size<2>(TileShape_MNK{}))); - // if (cute::thread0()) {printf("make_tiled_copy_C_warpcontiguousM "); - // print(t); printf("\n"); } - return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutC_TV(), t); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void softmax_rescale_o(Tensor0 &scores, Tensor1 &scores_max, - Tensor1 &scores_sum, Tensor2 &acc_o, - float softmax_scale_log2) { - if (Is_first) { - flash::template reduce_max(scores, scores_max); - flash::scale_apply_exp2(scores, scores_max, softmax_scale_log2); - flash::reduce_sum(scores, scores_sum); - } else { - Tensor scores_max_prev = make_fragment_like(scores_max); - cute::copy(scores_max, scores_max_prev); - flash::template reduce_max(scores, scores_max); - // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, - // MMA_K)) - Tensor acc_o_rowcol = make_tensor( - acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout())); -#pragma unroll - for (int mi = 0; mi < size(scores_max); ++mi) { - float scores_max_cur = - !Check_inf ? scores_max(mi) - : (scores_max(mi) == -INFINITY ? 0.0f : scores_max(mi)); - float scores_scale = - exp2f((scores_max_prev(mi) - scores_max_cur) * softmax_scale_log2); - scores_sum(mi) *= scores_scale; -#pragma unroll - for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) { - acc_o_rowcol(mi, ni) *= scores_scale; - } - } - flash::scale_apply_exp2(scores, scores_max, softmax_scale_log2); - Tensor scores_sum_cur = make_fragment_like(scores_sum); - flash::reduce_sum(scores, scores_sum_cur); -#pragma unroll - for (int mi = 0; mi < size(scores_sum); ++mi) { - scores_sum(mi) += scores_sum_cur(mi); - } - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void -write_softmax_to_gmem(Tensor const &tOrP, - Tensor &tPgP, - TiledCopy gmem_tiled_copy_P) { - // Reshape tOrP from (8, MMA_M, MMA_N) to (8, MMA_M * MMA_N) - Layout l = tOrP.layout(); - Tensor tPrP = make_tensor( - tOrP.data(), make_layout(get<0>(l), make_layout(get<1>(l), get<2>(l)))); - CUTE_STATIC_ASSERT_V(size<2>(tPgP) == _1{}); -// CUTE_STATIC_ASSERT_V(size<1>(tPrP) == size<1>(tPgP)); -#pragma unroll - for (int mi = 0; mi < size<1>(tPrP); ++mi) { - cute::copy(gmem_tiled_copy_P, tPrP(_, mi), tPgP(_, mi, 0)); - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void compute_attn_1rowblock(const Params ¶ms, - const int bidb, const int bidh, - const int m_block) { - - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - // The thread index. - const int tidx = threadIdx.x; - // The global block index. - // const int block_id = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * - // gridDim.y * blockIdx.z; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - constexpr int kNWarps = Kernel_traits::kNWarps; - constexpr int MMA_M = - kBlockM / decltype(size<0>( - typename Kernel_traits::TiledMma::TiledShape_MNK{}))::value; - - const BlockInfo binfo(params, bidb); - if (m_block * kBlockM >= binfo.actual_seqlen_q || binfo.actual_seqlen_k == 0) - return; - - int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN); - if (Is_causal) { - n_block_max = - std::min(n_block_max, cute::ceil_div((m_block + 1) * kBlockM, kBlockN)); - // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { - // printf("m_block = %d, n_block_max = %d\n", m_block, n_block_max); - // } - } - - // We iterate over the blocks in reverse order. This is because the last block - // is the only one that needs masking when we read K and V from global memory. - // Moreover, iterating in reverse might save us 1 register (we just need - // n_block instead of both n_block and n_block_max). - - const index_t row_offset_q = - binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + - m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride; - // We move K and V to the last block. - const index_t row_offset_k = - binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + - (n_block_max - 1) * kBlockN * params.k_row_stride + - (bidh / params.h_h_k_ratio) * params.k_head_stride; - const index_t row_offset_v = - binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + - (n_block_max - 1) * kBlockN * params.v_row_stride + - (bidh / params.h_h_k_ratio) * params.v_head_stride; - const index_t row_offset_p = - ((bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM) * - params.seqlen_k_rounded + - (n_block_max - 1) * kBlockN; - - Tensor gQ = make_tensor( - make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), - Shape, Int>{}, - make_stride(params.q_row_stride, _1{})); - Tensor gK = make_tensor( - make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), - Shape, Int>{}, - make_stride(params.k_row_stride, _1{})); - Tensor gV = make_tensor( - make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), - Shape, Int>{}, - make_stride(params.v_row_stride, _1{})); - Tensor gP = make_tensor( - make_gmem_ptr(reinterpret_cast(params.p_ptr) + row_offset_p), - Shape, Int>{}, - make_stride(params.seqlen_k_rounded, _1{})); - - Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutQ{}); - // Careful we're using the same smem for sQ and sK | sV if Share_Q_K_smem; - Tensor sK = - make_tensor(sQ.data() + (Kernel_traits::Share_Q_K_smem ? 0 : size(sQ)), - typename Kernel_traits::SmemLayoutKV{}); - Tensor sV = - make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{}); - Tensor sVt = - make_tensor(sV.data(), typename Kernel_traits::SmemLayoutVtransposed{}); - Tensor sVtNoSwizzle = make_tensor( - sV.data(), typename Kernel_traits::SmemLayoutVtransposedNoSwizzle{}); - - typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; - auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopyP gmem_tiled_copy_P; - auto gmem_thr_copy_P = gmem_tiled_copy_P.get_thread_slice(tidx); - - Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); - Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); - Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) - Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); - Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) - Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); - Tensor tPgP = gmem_thr_copy_P.partition_D(gP); - - typename Kernel_traits::TiledMma tiled_mma; - auto thr_mma = tiled_mma.get_thread_slice(tidx); - Tensor tSrQ = thr_mma.partition_fragment_A(sQ); // (MMA,MMA_M,MMA_K) - Tensor tSrK = thr_mma.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) - Tensor tOrVt = - thr_mma.partition_fragment_B(sVtNoSwizzle); // (MMA, MMA_K,MMA_N) - - Tensor acc_o = partition_fragment_C( - tiled_mma, Shape, Int>{}); // MMA, MMA_M, MMA_K - - // - // Copy Atom retiling - // - - auto smem_tiled_copy_Q = - make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma); - auto smem_thr_copy_Q = smem_tiled_copy_Q.get_thread_slice(tidx); - // auto smem_thr_copy_Q = make_tiled_copy_A_warpcontiguousM(typename - // Kernel_traits::SmemCopyAtom{}, tiled_mma).get_thread_slice(tidx); if - // (cute::thread0()) {smem_thr_copy_Q.print_all();} - Tensor tSsQ = smem_thr_copy_Q.partition_S(sQ); - // if (cute::thread0()) {print(tSsQ.layout()); printf("\n");} - - auto smem_tiled_copy_K = - make_tiled_copy_B(typename Kernel_traits::SmemCopyAtom{}, tiled_mma); - auto smem_thr_copy_K = smem_tiled_copy_K.get_thread_slice(tidx); - Tensor tSsK = smem_thr_copy_K.partition_S(sK); - - auto smem_tiled_copy_V = make_tiled_copy_B( - typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma); - auto smem_thr_copy_V = smem_tiled_copy_V.get_thread_slice(tidx); - Tensor tOsVt = smem_thr_copy_V.partition_S(sVt); - - // TODO: this might need to change if we change the mma instruction in SM70 - Tensor scores_max = - make_tensor(Shape(acc_o)>>{}); - Tensor scores_sum = make_fragment_like(scores_max); - - // - // PREDICATES - // - - // // Allocate predicate tensors for m and n - // Tensor tQpQ = make_tensor(make_shape(size<1>(tQsQ), size<2>(tQsQ)), - // Stride<_1,_0>{}); Tensor tKVpKV = - // make_tensor(make_shape(size<1>(tKsK), size<2>(tKsK)), - // Stride<_1,_0>{}); - - // Construct identity layout for sQ and sK - Tensor cQ = make_identity_tensor( - make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor cKV = make_identity_tensor( - make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - // Tensor tScQ = thr_mma.partition_A(cQ); // - // (MMA,MMA_M,MMA_K) if (cute::thread0()) { - // print(tScQ.layout()); printf("\n"); - // for (int i = 0; i < size(tScQ); ++i) { - // printf("%d ", get<0>(tScQ(i))); - // } - // printf("\n"); - // for (int i = 0; i < size(tScQ); ++i) { - // printf("%d ", get<1>(tScQ(i))); - // } - // printf("\n"); - // } - - // Repeat the partitioning with identity layouts - Tensor tQcQ = gmem_thr_copy_QKV.partition_S( - cQ); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) - Tensor tKVcKV = gmem_thr_copy_QKV.partition_S( - cKV); // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k) - - // Allocate predicate tensors for k - Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); - Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); - - // Set predicates for k bounds - if (!Is_even_K) { -#pragma unroll - for (int k = 0; k < size(tQpQ); ++k) { - tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; - } -#pragma unroll - for (int k = 0; k < size(tKVpKV); ++k) { - tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; - } - } - - // Prologue - - Tensor tQrQ = make_fragment_like(tQgQ); - // We don't need to clear the sQ smem tiles since we'll only write out the - // valid outputs - flash::copy( - gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, - binfo.actual_seqlen_q - m_block * kBlockM); - if (Kernel_traits::Is_Q_in_regs) { - cute::cp_async_fence(); - } - - // // Copy rmem to smem - // // copy(tQrQ, tQsQ); - // flash::cp_async_wait<0>(); - // __syncthreads(); - // // if (cute::thread(1, 0)) { print(tQsQ); } - // // Tensor sQNoSwizzle = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), typename Kernel_traits::SmemLayoutQNoSwizzle{}); - // // if (cute::thread0()) { print(sQNoSwizzle); } - - if (Kernel_traits::Share_Q_K_smem) { - flash::cp_async_wait<0>(); - __syncthreads(); - Tensor tSrQ_copy_view = smem_thr_copy_Q.retile_D(tSrQ); - CUTE_STATIC_ASSERT_V(size<1>(tSsQ) == size<1>(tSrQ_copy_view)); // M - cute::copy(smem_tiled_copy_Q, tSsQ, tSrQ_copy_view); - __syncthreads(); - } - - int n_block = n_block_max - 1; - // We don't need to clear the sK smem tiles since we'll mask out the scores - // anyway. - flash::copy(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, - tKVpKV, - binfo.actual_seqlen_k - n_block * kBlockN); - cute::cp_async_fence(); - // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z < 2) { print(tKgK); } - // __syncthreads(); - - if (Kernel_traits::Is_Q_in_regs && !Kernel_traits::Share_Q_K_smem) { - flash::cp_async_wait<1>(); - __syncthreads(); - Tensor tSrQ_copy_view = smem_thr_copy_Q.retile_D(tSrQ); - CUTE_STATIC_ASSERT_V(size<1>(tSsQ) == size<1>(tSrQ_copy_view)); // M - cute::copy(smem_tiled_copy_Q, tSsQ, tSrQ_copy_view); - } - - // auto seeds = at::cuda::philox::unpack(params.philox_args); - // unsigned long long seed = std::get<0>(seeds); - // unsigned long long offset = std::get<1>(seeds) + (bidb * params.h + bidh) * - // 32 + tidx % 32; - - // deprecated: no rng support. - // unsigned long long seed = 0; - // unsigned long long offset = 0; - - unsigned long long seed = params.rng_state[0]; - unsigned long long offset = - params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; - - // if (block_id == 0 && tidx == 0) { - // printf("seed:%lu\n",seed); - // printf("offset:%lu\n",offset); - // } - - // Save seed and offset for backward. - // if (block_id == 0 && tidx == 0) { - // params.rng_state[0] = seed; - // params.rng_state[1] = std::get<1>(seeds); - // } - - clear(acc_o); - - // For performance reason, we separate out two kinds of iterations: - // those that need masking on S, and those that don't. - // We need masking on S for the very last block when K and V has length not - // multiple of kBlockN. We also need masking on S if it's causal, for the last - // ceil_div(kBlockM, kBlockN) blocks. We will have at least 1 "masking" - // iteration. - - constexpr int n_masking_steps = - Is_causal ? cute::ceil_div(kBlockM, kBlockN) : 1; -#pragma unroll - for (int masking_step = 0; masking_step < n_masking_steps; - ++masking_step, --n_block) { - Tensor acc_s = partition_fragment_C( - tiled_mma, - Shape, Int>{}); // (MMA=4, MMA_M, MMA_N) - clear(acc_s); - flash::cp_async_wait<0>(); - __syncthreads(); - - // Advance gV - if (masking_step > 0) { - tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tVgV, - tVsV, tKVcKV, tKVpKV); - } else { - // Clear the smem tiles to account for predicated off loads - flash::copy( - gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, - binfo.actual_seqlen_k - n_block * kBlockN); - } - cute::cp_async_fence(); - - flash::gemm( - acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, - smem_tiled_copy_K, smem_thr_copy_Q, smem_thr_copy_K); - // if (cute::thread0()) { print(acc_s); } - - // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, - // MMA_N)) - Tensor scores = make_tensor( - acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); - // if (cute::thread0()) { print(scores); } - // We don't put the masking before the matmul S = Q K^T because we don't - // clear sK for rows outside actual_seqlen_k. So those rows could have Inf / - // NaN, and the matmul can produce Inf / NaN. - if (!Is_causal) { - if (!Is_even_N) { - flash::apply_mask(scores, binfo.actual_seqlen_k - n_block * kBlockN); - } - } else { - // Tensor caccS = make_identity_tensor(Shape, - // Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) Tensor taccScS = - // thr_mma.partition_C(caccS); // - // (MMA,MMA_M,MMA_N) static_assert(decltype(size<0>(taccScS))::value == - // 4); - // // Convert to ((2, 2), MMA_M, MMA_N) then take only the row indices. - // Tensor idx_row = logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), - // _, 0); Tensor idx_rowcol = make_tensor(taccScS.data(), - // flash::convert_layout_acc_rowcol(taccScS.layout())); - // flash::apply_mask_causal_w_idx(scores, idx_rowcol, n_block * kBlockN, - // binfo.actual_seqlen_k, - // m_block * kBlockM); - // Idk why it's get<1> and not get<0> of the stride. - // if (cute::thread0()) { print(idx_row.layout()); - // print(stride<1>(idx_row)); printf("stride = %d \n", - // get<1>(stride<1>(idx_row))); } I can't get the stride from idx_row - flash::apply_mask_causal( - scores, n_block * kBlockN, binfo.actual_seqlen_k, - // m_block * kBlockM + get<0>(idx_row(0)), - m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4, kNWarps * 16); - // m_block * kBlockM + (tidx / 32) * 16, kNWarps * 16); - // m_block * kBlockM + (tidx / 32) * (kBlockM / kNWarps), 16); - } - - flash::cp_async_wait<0>(); - __syncthreads(); - if (n_block > 0) { - // Advance gK - tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tKgK, - tKsK, tKVcKV, tKVpKV); - // This cp_async_fence needs to be in the if block, otherwise the - // synchronization isn't right and we get race conditions. - cute::cp_async_fence(); - } - - // TODO: when we have key_padding_mask we'll need to Check_inf - masking_step == 0 - ? softmax_rescale_o( - scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2) - : softmax_rescale_o( - scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2); - - // Convert scores from fp32 to fp16/bf16 - Tensor rP = flash::convert_type(scores); - // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, - // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using - // m16n8k8. - Tensor tOrP = make_tensor( - rP.data(), flash::convert_layout_rowcol_Aregs( - rP.layout())); - uint32_t block_row_idx = m_block * (kBlockM / 16) + tidx / 32; - uint32_t block_col_idx = n_block * (kBlockN / 32); - if (Return_softmax) { - Tensor tOrP_copy = make_fragment_like(tOrP); - cute::copy(tOrP, tOrP_copy); - flash::apply_dropout( - tOrP_copy, params.p_dropout_in_uint8_t, seed, offset, block_row_idx, - block_col_idx, kNWarps); - flash::write_softmax_to_gmem(tOrP_copy, tPgP, gmem_tiled_copy_P); - tPgP.data() = tPgP.data() + (-kBlockN); - } - if (Is_dropout) { - flash::apply_dropout(tOrP, params.p_dropout_in_uint8_t, seed, offset, - block_row_idx, block_col_idx, kNWarps); - } - // if (cute::thread0()) { print(tOrP); } - - flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, - smem_tiled_copy_V, smem_thr_copy_V); - // if (cute::thread0()) { print(scores); } - - // This check is at the end of the loop since we always have at least 1 - // iteration - if (n_masking_steps > 1 && n_block <= 0) { - --n_block; - break; - } - } - - // These are the iterations where we don't need masking on S - for (; n_block >= 0; --n_block) { - Tensor acc_s = partition_fragment_C( - tiled_mma, - Shape, Int>{}); // (MMA=4, MMA_M, MMA_N) - clear(acc_s); - flash::cp_async_wait<0>(); - __syncthreads(); - // Advance gV - tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tVgV, tVsV, - tKVcKV, tKVpKV); - cute::cp_async_fence(); - - flash::gemm( - acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, - smem_tiled_copy_K, smem_thr_copy_Q, smem_thr_copy_K); - - flash::cp_async_wait<0>(); - __syncthreads(); - if (n_block > 0) { - // Advance gK - tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tKgK, - tKsK, tKVcKV, tKVpKV); - // This cp_async_fence needs to be in the if block, otherwise the - // synchronization isn't right and we get race conditions. - cute::cp_async_fence(); - } - - // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, - // MMA_N)) - Tensor scores = make_tensor( - acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); - softmax_rescale_o(scores, scores_max, scores_sum, acc_o, - params.scale_softmax_log2); - - Tensor rP = flash::convert_type(scores); - // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, - // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using - // m16n8k8. - Tensor tOrP = make_tensor( - rP.data(), flash::convert_layout_rowcol_Aregs( - rP.layout())); - uint32_t block_row_idx = m_block * (kBlockM / 16) + tidx / 32; - uint32_t block_col_idx = n_block * (kBlockN / 32); - if (Return_softmax) { - Tensor tOrP_copy = make_fragment_like(tOrP); - cute::copy(tOrP, tOrP_copy); - flash::apply_dropout( - tOrP_copy, params.p_dropout_in_uint8_t, seed, offset, block_row_idx, - block_col_idx, kNWarps); - flash::write_softmax_to_gmem(tOrP_copy, tPgP, gmem_tiled_copy_P); - tPgP.data() = tPgP.data() + (-kBlockN); - } - if (Is_dropout) { - flash::apply_dropout(tOrP, params.p_dropout_in_uint8_t, seed, offset, - block_row_idx, block_col_idx, kNWarps); - } - - flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, - smem_tiled_copy_V, smem_thr_copy_V); - } - - // Epilogue - - // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, - // MMA_K)) - Tensor acc_o_rowcol = make_tensor( - acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout())); - Tensor lse = make_fragment_like(scores_sum); -#pragma unroll - for (int mi = 0; mi < size<0>(acc_o_rowcol); ++mi) { - float sum = scores_sum(mi); - float inv_sum = (sum == 0.f || sum != sum) ? 1.f : 1.f / sum; - lse(mi) = (sum == 0.f || sum != sum) - ? INFINITY - : scores_max(mi) * params.scale_softmax + __logf(sum); - float scale = !Is_dropout ? inv_sum : inv_sum * params.rp_dropout; -#pragma unroll - for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) { - acc_o_rowcol(mi, ni) *= scale; - } - } - - // if (cute::thread0()) { print(acc_o_rowcol); } - - // Convert acc_o from fp32 to fp16/bf16 - Tensor rO = flash::convert_type(acc_o); - Tensor sO = make_tensor( - sQ.data(), typename Kernel_traits::SmemLayoutO{}); // (SMEM_M,SMEM_N) - // Partition sO to match the accumulator partitioning - auto smem_tiled_copy_O = - make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomO{}, tiled_mma); - auto smem_thr_copy_O = smem_tiled_copy_O.get_thread_slice(tidx); - // auto smem_thr_copy_O = make_tiled_copy_C_warpcontiguousM(typename - // Kernel_traits::SmemCopyAtomO{}, tiled_mma).get_thread_slice(tidx); - Tensor taccOrO = - smem_thr_copy_O.retile_S(rO); // ((Atom,AtomNum), MMA_M, MMA_N) - Tensor taccOsO = - smem_thr_copy_O.partition_D(sO); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - // sO has the same size as sQ, so we don't need to sync here. - if (Kernel_traits::Share_Q_K_smem) { - __syncthreads(); - } - - cute::copy(smem_tiled_copy_O, taccOrO, taccOsO); - - const index_t row_offset_o = - binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + - m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; - const index_t row_offset_lse = - (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM; - Tensor gO = make_tensor( - make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), - Shape, Int>{}, - make_stride(params.o_row_stride, _1{})); - Tensor gLSE = make_tensor( - make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + - row_offset_lse), - Shape>{}, Stride<_1>{}); - - typename Kernel_traits::GmemTiledCopyO gmem_tiled_copy_O; - auto gmem_thr_copy_O = gmem_tiled_copy_O.get_thread_slice(tidx); - Tensor tOsO = - gmem_thr_copy_O.partition_S(sO); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tOgO = gmem_thr_copy_O.partition_D(gO); - - __syncthreads(); - - Tensor tOrO = make_tensor(shape(tOgO)); - cute::copy(gmem_tiled_copy_O, tOsO, tOrO); - - Tensor caccO = make_identity_tensor( - Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor taccOcO = thr_mma.partition_C(caccO); // (MMA,MMA_M,MMA_K) - static_assert(decltype(size<0>(taccOcO))::value == 4); - // Convert to ((2, 2), MMA_M, MMA_K) then take only the row indices. - Tensor taccOcO_row = - logical_divide(taccOcO, Shape<_2>{})(make_coord(0, _), _, 0); - CUTE_STATIC_ASSERT_V(size(lse) == size(taccOcO_row)); // MMA_M - if (get<1>(taccOcO_row(0)) == 0) { -#pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { - const int row = get<0>(taccOcO_row(mi)); - if (row < binfo.actual_seqlen_q - m_block * kBlockM) { - gLSE(row) = lse(mi); - } - } - } - - // Construct identity layout for sO - Tensor cO = make_identity_tensor( - make_shape(size<0>(sO), size<1>(sO))); // (BLK_M,BLK_K) -> (blk_m,blk_k) - // Repeat the partitioning with identity layouts - Tensor tOcO = - gmem_thr_copy_O.partition_D(cO); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) - Tensor tOpO = make_tensor(make_shape(size<2>(tOgO))); - if (!Is_even_K) { -#pragma unroll - for (int k = 0; k < size(tOpO); ++k) { - tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d; - } - } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy(gmem_tiled_copy_O, tOrO, tOgO, tOcO, tOpO, - binfo.actual_seqlen_q - m_block * kBlockM); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void compute_attn(const Params ¶ms) { - const int m_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - - // We want the fwd and bwd to generate the same dropout pattern (RNG), without - // restricting them to have the same number of threads or have to traverse the - // attention matrix in the same order. In the Philox RNG, we use the offset to - // store the batch, head, and the lane id (within a warp). We use the - // subsequence to store the location of the 16 x 32 blocks within the - // attention matrix. This way, as long as we have the batch, head, and the - // location of the 16 x 32 block within the attention matrix, we can generate - // the exact same dropout pattern. - - flash::compute_attn_1rowblock(params, bidb, bidh, - m_block); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -} // namespace flash -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_launch_template.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_launch_template.h deleted file mode 100644 index eeca65e42..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/flash_fwd_launch_template.h +++ /dev/null @@ -1,343 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -// #include - -#include "flash.h" -#include "flash_fwd_kernel.h" -#include "static_switch.h" -namespace brt { -namespace cuda { -namespace kernel { -template -__global__ void flash_fwd_kernel(Flash_fwd_params params) { - flash::compute_attn(params); -} - -template -void run_flash_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr size_t smem_size = Kernel_traits::kSmemSize; - // printf("smem_size = %d\n", smem_size); - - // Work-around for gcc 7. It doesn't like nested BOOL_SWITCH. - // https://github.com/kokkos/kokkos-kernels/issues/349 - // https://github.com/HazyResearch/flash-attention/issues/21 - - const int num_m_block = - (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; - dim3 grid(num_m_block, params.b, params.h); - // We also use is_even_N to set Unpadded in the BlockInfo constructor, so we - // need to check for cu_seqlens_q as well. - const bool is_even_N = params.cu_seqlens_q == nullptr && - params.cu_seqlens_k == nullptr && - params.seqlen_k % Kernel_traits::kBlockN == 0; - const bool is_even_K = params.d == Kernel_traits::kHeadDim; - const bool return_softmax = params.p_ptr != nullptr; - BOOL_SWITCH(is_even_N, IsEvenNConst, [&] { - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - BOOL_SWITCH(return_softmax, ReturnSoftmaxConst, [&] { - // Will only return softmax if dropout, to reduce compilation time. - auto kernel = &flash_fwd_kernel < Kernel_traits, Is_dropout, Is_causal, - IsEvenNConst, IsEvenKConst, ReturnSoftmaxConst && Is_dropout > ; - // auto kernel = &flash_fwd_kernel; - if (smem_size >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, - // smem_size)); - cudaFuncSetAttribute( - kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); - } - int ctas_per_sm; - cudaError status_ = cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &ctas_per_sm, kernel, Kernel_traits::kNThreads, smem_size); - // printf("smem_size = %d, CTAs per SM = %d\n", int(smem_size), - // ctas_per_sm); - kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - }); - }); -} - -template -void run_mha_fwd_hdim32(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 32; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - }); - }); -} - -template -void run_mha_fwd_hdim64(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 64; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if constexpr (!Is_dropout) { - // Using 8 warps is 18% slower for seqlen=2k, 2 warps is 5% slower - // Using block size (64 x 256) is 27% slower for seqlen=2k - // Using block size (256 x 64) is 85% slower for seqlen=2k, because of - // register spilling - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - }); - }); -} - -template -void run_mha_fwd_hdim96(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 96; - // auto dprops = at::cuda::getCurrentDeviceProperties(); - // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; - // TODO: get real is_sm8x - bool is_sm8x = true; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's - // square), - if (is_sm8x) { - if constexpr (!Is_causal) { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); These two are always slower - // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); - }); - }); -} - -template -void run_mha_fwd_hdim128(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 128; - // auto dprops = at::cuda::getCurrentDeviceProperties(); - // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; - bool is_sm8x = true; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if constexpr (!Is_dropout) { - // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's - // square), and 128 x 32 (48 KB smem) is the fastest for non-causal - // since we get 2 CTAs per SM. - if (is_sm8x) { - if constexpr (!Is_causal) { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); Using 8 warps (128 - // x 128 and 256 x 64) is 28% slower for seqlen=2k - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); 1st ones are good - // for H100, A100 2nd one is good for A6000 bc we get slightly better - // occupancy - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - }); - }); -} - -template -void run_mha_fwd_hdim160(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 160; - // auto dprops = at::cuda::getCurrentDeviceProperties(); - // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; - bool is_sm8x = true; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - // For A100, H100, 128 x 32 is the fastest. - // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's - // square), and 128 x 64 with 8 warps is the fastest for non-causal. - if (is_sm8x) { - if constexpr (!Is_causal) { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - }); - }); -} - -template -void run_mha_fwd_hdim192(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 192; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if constexpr (!Is_dropout) { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - }); - }); -} - -template -void run_mha_fwd_hdim224(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 224; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64)) { // 112 KB - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); We can't do 128 x 32 with 8 - // warps because with headdim 224, kBlockKSmem = 32. If we have N = 32, - // there are only 1024 elements to load at once, where each load is 8 - // elements. This means we can only use 128 threads and not 256 threads. - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - }); - }); -} - -template -void run_mha_fwd_hdim256(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr int Headdim = 256; - int device; - cudaGetDevice(&device); - int max_smem_per_sm, max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_sm, cudaDevAttrMaxSharedMemoryPerMultiprocessor, device); - status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // printf("max_smem_per_sm = %d, max_smem_per_block = %d\n", max_smem_per_sm, - // max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - // For A100, we want to run with 128 x 64 (128KB smem). - // For H100 we want to run with 64 x 64 (96KB smem) since then we can get - // 2 CTAs per SM. - if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64) && - max_smem_per_sm < 4 * Headdim * (64 + 2 * 64)) { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd< - Flash_fwd_kernel_traits, - Is_dropout, Is_causal>(params, stream); - } - // 64 KB - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); 96 KB - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - }); - }); -} -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits.h deleted file mode 100644 index 7458fd168..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits.h +++ /dev/null @@ -1,392 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -#include "cute/algorithm/copy.hpp" - -#include "cutlass/cutlass.h" -#include "cutlass/layout/layout.h" -#include - -using namespace cute; -namespace brt { -namespace cuda { -namespace kernel { -template -struct Flash_kernel_traits { - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using Element = elem_type; - static constexpr bool Has_cp_async = true; -#else - using Element = cutlass::half_t; - static constexpr bool Has_cp_async = false; -#endif - - using ElementAccum = float; - using index_t = uint32_t; - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using MMA_Atom_Arch = - std::conditional_t, - MMA_Atom, - MMA_Atom>; - using ValLayoutMNK = Layout>; -#else - using MMA_Atom_Arch = MMA_Atom; - using ValLayoutMNK = Layout>; -#endif - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; -#else - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; -#endif -}; - -// If Share_Q_K_smem is true, that forces Is_Q_in_regs to be true -template > -struct Flash_fwd_kernel_traits : public Base { - using Element = typename Base::Element; - using ElementAccum = typename Base::ElementAccum; - using index_t = typename Base::index_t; - static constexpr bool Has_cp_async = Base::Has_cp_async; - using SmemCopyAtom = typename Base::SmemCopyAtom; - using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; - - static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; - static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; - - // The number of threads. - static constexpr int kNWarps = kNWarps_; - static constexpr int kNThreads = kNWarps * 32; - - static constexpr int kBlockM = kBlockM_; - static constexpr int kBlockN = kBlockN_; - static constexpr int kHeadDim = kHeadDim_; - static_assert(kHeadDim % 32 == 0); - static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; - static constexpr int kBlockKGmem = - kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); - static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; - - using TiledMma = - TiledMMA, _1, _1>>, // 4x1x1 or 8x1x1 thread - // group - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for - // 16x16x16 MMA and LDSM - - using SmemLayoutAtomQ = decltype(composition( - Swizzle{}, - // This has to be kBlockKSmem, using kHeadDim gives wrong results for - // d=128 - Layout>, Stride, _1>>{})); - using SmemLayoutQ = decltype(tile_to_shape( - SmemLayoutAtomQ{}, Shape, Int>{})); - - using SmemLayoutKV = decltype(tile_to_shape( - SmemLayoutAtomQ{}, Shape, Int>{})); - - // This has to be kBlockN and not 8, otherwise we get wrong results for d=128 - using SmemLayoutAtomVtransposedNoSwizzle = - Layout, Int>, - Stride<_1, Int>>; - using SmemLayoutAtomVtransposed = decltype(composition( - Swizzle{}, SmemLayoutAtomVtransposedNoSwizzle{})); - using SmemLayoutVtransposed = decltype(tile_to_shape( - SmemLayoutAtomVtransposed{}, Shape, Int>{})); - // Maybe the VtransposeNoSwizzle just needs to have the right shape - // And the strides don't matter? - using SmemLayoutVtransposedNoSwizzle = - decltype(tile_to_shape(SmemLayoutAtomVtransposedNoSwizzle{}, - Shape, Int>{})); - // using SmemLayoutVtransposedNoSwizzle = - // decltype(SmemLayoutVtransposed{}.layout_fn()); - - using SmemLayoutAtomO = decltype(composition( - Swizzle{}, - Layout, Int>, Stride, _1>>{})); - using SmemLayoutO = decltype(tile_to_shape( - SmemLayoutAtomO{}, Shape, Int>{})); - using SmemCopyAtomO = Copy_Atom; - - static constexpr int kSmemQCount = size(SmemLayoutQ{}); - static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; - static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); - static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); - static constexpr int kSmemSize = Share_Q_K_smem - ? std::max(kSmemQSize, kSmemKVSize) - : kSmemQSize + kSmemKVSize; - - static constexpr int kGmemElemsPerLoad = - sizeof(cute::uint128_t) / sizeof(Element); - static_assert(kHeadDim % kGmemElemsPerLoad == 0, - "kHeadDim must be a multiple of kGmemElemsPerLoad"); - // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because - // of bank conflicts. For example, for d=128, smem is split into 2 "pages", - // each page takes care of columns 0-63 and 64-127. If we have 16 threads per - // row for gmem read, when we write to smem, thread 0 - 7 will write to the - // first page and thread 8 - 15 will write to the second page, to the same - // banks. - static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRow == 0, - "kNThreads must be a multiple of kGmemThreadsPerRow"); - using GmemLayoutAtom = Layout< - Shape, Int>, - Stride, _1>>; - - // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we - // won't be reading from the same address by the same threadblock. This is - // slightly faster. - using Gmem_copy_struct = std::conditional_t< - Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL, DefaultCopy>; - using GmemTiledCopyQKV = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopyO = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRowP == 0, - "kNThreads must be a multiple of kGmemThreadsPerRowP"); - using GmemLayoutAtomP = Layout< - Shape, Int>, - Stride, _1>>; - - using GmemTiledCopyP = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtomP{}, - Layout>{})); // Val layout, 8 vals per store -}; - -// Is_V_in_regs is an option to reduce smem usage, but will increase register -// pressue. No_double_buffer is another option to reduce smem usage, but will -// slow things down. -template > -struct Flash_bwd_kernel_traits : public Base { - using Element = typename Base::Element; - using ElementAccum = typename Base::ElementAccum; - using index_t = typename Base::index_t; - static constexpr bool Has_cp_async = Base::Has_cp_async; - using SmemCopyAtom = typename Base::SmemCopyAtom; - using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; - - static constexpr bool Is_V_in_regs = Is_V_in_regs_; - static constexpr bool No_double_buffer = No_double_buffer_; - - // The number of threads. - static constexpr int kNWarps = kNWarps_; - static constexpr int kNThreads = kNWarps * 32; - - static constexpr int kBlockM = kBlockM_; - static constexpr int kBlockN = kBlockN_; - static constexpr int kHeadDim = kHeadDim_; - static_assert(kHeadDim % 32 == 0); - static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; - static constexpr int kBlockKGmem = - kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); - static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; - - static constexpr int AtomLayoutMSdP = AtomLayoutMSdP_; - static_assert(kNWarps % AtomLayoutMSdP == 0); - static_assert(kNWarps % AtomLayoutNdKV == 0); - static_assert(kNWarps % AtomLayoutMdQ == 0); - - using TiledMmaSdP = TiledMMA< - typename Base::MMA_Atom_Arch, - Layout, Int, _1>>, - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 - // MMA and LDSM - - using TiledMmadKV = TiledMMA< - typename Base::MMA_Atom_Arch, - Layout, Int, _1>>, - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 - // MMA and LDSM - - using TiledMmadQ = - TiledMMA, Int, - _1>>, // 2x4x1 or 4x2x1 thread group - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for - // 16x16x16 MMA and LDSM - - using SmemLayoutAtomQdO = decltype(composition( - Swizzle{}, - Layout>, Stride, _1>>{})); - using SmemLayoutQdO = decltype(tile_to_shape( - SmemLayoutAtomQdO{}, make_shape(Int{}, Int{}))); - - using SmemLayoutAtomKV = decltype(composition( - Swizzle{}, - Layout, Int>, - Stride, _1>>{})); - using SmemLayoutKV = decltype(tile_to_shape( - // SmemLayoutAtomQdO{}, - SmemLayoutAtomKV{}, make_shape(Int{}, Int{}))); - - using SmemLayoutAtomKtransposedNoSwizzle = - Layout, Int>, - Stride<_1, Int>>; - using SmemLayoutAtomKtransposed = decltype(composition( - Swizzle{}, SmemLayoutAtomKtransposedNoSwizzle{})); - using SmemLayoutKtransposed = - decltype(tile_to_shape(SmemLayoutAtomKtransposed{}, - make_shape(Int{}, Int{}))); - // Maybe the KtransposeNoSwizzle just needs to have the right shape - // And the strides don't matter? - using SmemLayoutKtransposedNoSwizzle = - decltype(tile_to_shape(SmemLayoutAtomKtransposedNoSwizzle{}, - make_shape(Int{}, Int{}))); - // using SmemLayoutKtransposedNoSwizzle = - // decltype(SmemLayoutKtransposed{}.layout_fn()); - - // TODO: generalize to other values of kBlockN - // TODO: what should be the Swizzle here? 3 is faster than 1, and 1 is faster - // than 2 static constexpr int kPBlockN = kBlockN; - static_assert(kBlockN >= 64); - // TD [2023-03-19]: Idk why kPBlockN = 16 and kSwizzlePdS=3 is the fastest. - static constexpr int kPBlockN = 64; - static_assert(kPBlockN == 16 || kPBlockN == 32 || kPBlockN == 64); - // static constexpr int kSwizzlePdS = kPBlockN == 16 ? 1 : (kPBlockN == 32 ? 2 - // : 3); - static constexpr int kSwizzlePdS = 3; - using SmemLayoutAtomPdS = decltype(composition( - Swizzle{}, - Layout, Int>, Stride, _1>>{})); - using SmemLayoutPdS = decltype(tile_to_shape( - SmemLayoutAtomPdS{}, make_shape(Int{}, Int{}))); - using SmemLayoutAtomPdStransposedNoSwizzle = - Layout, Int>, Stride<_1, Int>>; - using SmemLayoutAtomPdStransposed = decltype(composition( - Swizzle{}, SmemLayoutAtomPdStransposedNoSwizzle{})); - using SmemLayoutPdStransposed = - decltype(tile_to_shape(SmemLayoutAtomPdStransposed{}, - make_shape(Int{}, Int{}))); - using SmemLayoutPdStransposedNoSwizzle = - decltype(tile_to_shape(SmemLayoutAtomPdStransposedNoSwizzle{}, - make_shape(Int{}, Int{}))); - // using SmemLayoutPdStransposedNoSwizzle = - // decltype(SmemLayoutPdStransposed{}.layout_fn()); - using SmemCopyAtomPdS = Copy_Atom; - - using SmemLayoutAtomQdOtransposedNoSwizzle = - Layout, Int>, - Stride<_1, Int>>; - using SmemLayoutAtomQdOtransposed = decltype(composition( - Swizzle{}, SmemLayoutAtomQdOtransposedNoSwizzle{})); - using SmemLayoutQdOtransposed = - decltype(tile_to_shape(SmemLayoutAtomQdOtransposed{}, - make_shape(Int{}, Int{}))); - using SmemLayoutQdOtransposedNoSwizzle = - decltype(tile_to_shape(SmemLayoutAtomQdOtransposedNoSwizzle{}, - make_shape(Int{}, Int{}))); - // using SmemLayoutQdOtransposedNoSwizzle = - // decltype(SmemLayoutQdOtransposed{}.layout_fn()); - - using SmemLayoutAtomdKV = decltype(composition( - Swizzle{}, - Layout>, Stride, _1>>{})); - using SmemLayoutdKV = decltype(tile_to_shape( - SmemLayoutAtomdKV{}, make_shape(Int{}, Int{}))); - using SmemCopyAtomdKV = Copy_Atom; - - using SmemLayoutAtomdQ = decltype(composition( - Swizzle{}, - Layout>, Stride, _1>>{})); - using SmemLayoutdQ = decltype(tile_to_shape( - SmemLayoutAtomdQ{}, make_shape(Int{}, Int{}))); - using SmemCopyAtomdQ = Copy_Atom; - - static constexpr int kSmemQdOCount = - size(SmemLayoutQdO{}) * - (No_double_buffer ? 2 : 3); // Double buffer for sQ - static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; - static constexpr int kSmemdSCount = size(SmemLayoutPdS{}); - static constexpr int kSmemPCount = size(SmemLayoutPdS{}); - static constexpr int kSmemdQCount = size(SmemLayoutdQ{}); - static constexpr int kSmemdPsumCount = kBlockM; - static constexpr int kSmemQdOSize = kSmemQdOCount * sizeof(Element); - static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); - static constexpr int kSmemdSSize = kSmemdSCount * sizeof(Element); - static constexpr int kSmemPSize = kSmemPCount * sizeof(Element); - static constexpr int kSmemdQSize = kSmemdQCount * sizeof(Element); - static constexpr int kSmemdPsumSize = kSmemdPsumCount * sizeof(ElementAccum); - static constexpr int kSmemSize = - kSmemQdOSize + - (!Is_V_in_regs - ? kSmemKVSize + kSmemdSSize + std::max(kSmemPSize, kSmemdQSize) - : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + - std::max(kSmemPSize, kSmemdQSize))); - static constexpr int kSmemSize1colblock = - kSmemQdOSize + - (!Is_V_in_regs - ? kSmemKVSize + kSmemdSSize + kSmemPSize - : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + kSmemPSize)); - static constexpr int kSmemSize1rowblock = - kSmemQdOSize / 3 * 2 + kSmemKVSize / 2 * 3 + kSmemdSSize + kSmemPSize; - - static constexpr int kGmemElemsPerLoad = - sizeof(cute::uint128_t) / sizeof(Element); - static_assert(kHeadDim % kGmemElemsPerLoad == 0, - "kHeadDim must be a multiple of kGmemElemsPerLoad"); - // Using kBlockKSmem instead of kHeadDim here to avoid bank conflicts, but - // doesn't seem to affect speed in practice. - static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRow == 0, - "kNThreads must be a multiple of kGmemThreadsPerRow"); - using GmemLayoutAtom = Layout< - Shape, Int>, - Stride, _1>>; - - // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we - // won't be reading from the same address by the same threadblock. This is - // slightly faster. - using Gmem_copy_struct = std::conditional_t< - Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL, DefaultCopy>; - using GmemTiledCopyQKV = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopydO = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemTiledCopydKV = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemTiledCopydQ = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemLayoutAtomdQaccum = std::conditional_t< - kBlockKSmem == 32, - Layout, // Thread layout, 8 threads per row - Stride<_8, _1>>, - Layout, // Thread layout, 16 threads per row - Stride<_16, _1>>>; - using GmemTiledCopydQaccum = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtomdQaccum{}, - Layout>{})); // Val layout, 4 vals per store - - using GmemTiledCopydQaccumAtomicAdd = decltype(make_tiled_copy( - Copy_Atom{}, - Layout, // Thread layout, 8 threads per row - Stride<_32, _1>>{}, - Layout>{})); // Val layout, 1 val per store -}; -} // namespace kernel -} // namespace cuda -} // namespace brt -//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits_sm90.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits_sm90.h deleted file mode 100644 index 93c9344d7..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/kernel_traits_sm90.h +++ /dev/null @@ -1,169 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -#include "cute/algorithm/copy.hpp" - -#include "cutlass/cutlass.h" -#include "cutlass/layout/layout.h" -#include - -using namespace cute; -namespace brt { -namespace cuda { -namespace kernel { -template -struct Flash_kernel_traits_sm90 { - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using Element = elem_type; - static constexpr bool Has_cp_async = true; -#else - using Element = cutlass::half_t; - static constexpr bool Has_cp_async = false; -#endif - - using ElementAccum = float; - using index_t = uint32_t; - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using MMA_Atom_Arch = - std::conditional_t, - MMA_Atom, - MMA_Atom>; - using ValLayoutMNK = Layout>; -#else - using MMA_Atom_Arch = MMA_Atom; - using ValLayoutMNK = Layout>; -#endif - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; -#else - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; -#endif -}; - -template > -struct Flash_fwd_kernel_traits : public Base { - using Element = typename Base::Element; - using ElementAccum = typename Base::ElementAccum; - using index_t = typename Base::index_t; - static constexpr bool Has_cp_async = Base::Has_cp_async; - using SmemCopyAtom = typename Base::SmemCopyAtom; - using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; - - static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; - static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; - - // The number of threads. - static constexpr int kNWarps = kNWarps_; - static constexpr int kNThreads = kNWarps * 32; - - static constexpr int kBlockM = kBlockM_; - static constexpr int kBlockN = kBlockN_; - static constexpr int kHeadDim = kHeadDim_; - static_assert(kHeadDim % 32 == 0); - static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; - static constexpr int kBlockKGmem = - kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); - static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; - - using TiledMma = - TiledMMA, _1, _1>>, // 4x1x1 or 8x1x1 thread - // group - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for - // 16x16x16 MMA and LDSM - - using SmemLayoutAtomQ = decltype(composition( - Swizzle{}, - // This has to be kBlockKSmem, using kHeadDim gives wrong results for - // d=128 - Layout>, Stride, _1>>{})); - using SmemLayoutQ = decltype(tile_to_shape( - SmemLayoutAtomQ{}, Shape, Int>{})); - - using SmemLayoutKV = decltype(tile_to_shape( - SmemLayoutAtomQ{}, Shape, Int>{})); - - using SmemLayoutAtomVtransposed = - decltype(composition(Swizzle{}, - // This has to be kBlockN and not 8, otherwise we get - // wrong results for d=128 - Layout, Int>, - Stride<_1, Int>>{})); - using SmemLayoutVtransposed = decltype(tile_to_shape( - SmemLayoutAtomVtransposed{}, Shape, Int>{})); - // Maybe the VtransposeNoSwizzle just needs to have the right shape - // And the strides don't matter? - using SmemLayoutVtransposedNoSwizzle = - decltype(SmemLayoutVtransposed{}.layout_fn()); - - using SmemLayoutAtomO = decltype(composition( - Swizzle{}, - Layout, Int>, Stride, _1>>{})); - using SmemLayoutO = decltype(tile_to_shape( - SmemLayoutAtomO{}, Shape, Int>{})); - using SmemCopyAtomO = Copy_Atom; - - static constexpr int kSmemQCount = size(SmemLayoutQ{}); - static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; - static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); - static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); - static constexpr int kSmemSize = Share_Q_K_smem - ? std::max(kSmemQSize, kSmemKVSize) - : kSmemQSize + kSmemKVSize; - - static constexpr int kGmemElemsPerLoad = - sizeof(cute::uint128_t) / sizeof(Element); - static_assert(kHeadDim % kGmemElemsPerLoad == 0, - "kHeadDim must be a multiple of kGmemElemsPerLoad"); - // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because - // of bank conflicts. For example, for d=128, smem is split into 2 "pages", - // each page takes care of columns 0-63 and 64-127. If we have 16 threads per - // row for gmem read, when we write to smem, thread 0 - 7 will write to the - // first page and thread 8 - 15 will write to the second page, to the same - // banks. - static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRow == 0, - "kNThreads must be a multiple of kGmemThreadsPerRow"); - using GmemLayoutAtom = Layout< - Shape, Int>, - Stride, _1>>; - - // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we - // won't be reading from the same address by the same threadblock. This is - // slightly faster. - using Gmem_copy_struct = std::conditional_t< - Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL, DefaultCopy>; - using GmemTiledCopyQKV = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopyO = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRowP == 0, - "kNThreads must be a multiple of kGmemThreadsPerRowP"); - using GmemLayoutAtomP = Layout< - Shape, Int>, - Stride, _1>>; - - using GmemTiledCopyP = decltype(make_tiled_copy( - Copy_Atom{}, GmemLayoutAtomP{}, - Layout>{})); // Val layout, 8 vals per store -}; -} // namespace kernel -} // namespace cuda -} // namespace brt -//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/softmax.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/softmax.h deleted file mode 100644 index 796a92b76..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/softmax.h +++ /dev/null @@ -1,332 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -#include - -#include - -#include -#include - -#include "philox.cuh" -#include "utils.h" -namespace brt { -namespace cuda { -namespace kernel { -namespace flash { - -using namespace cute; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -__device__ inline void thread_reduce_(Tensor const &tensor, - Tensor &summary, - Operator &op) { - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(summary) == size<0>(tensor)); -#pragma unroll - for (int mi = 0; mi < size<0>(tensor); mi++) { - summary(mi) = zero_init ? tensor(mi, 0) : op(summary(mi), tensor(mi, 0)); -#pragma unroll - for (int ni = 1; ni < size<1>(tensor); ni++) { - summary(mi) = op(summary(mi), tensor(mi, ni)); - } - } -} - -template -__device__ inline void quad_allreduce_(Tensor &dst, - Tensor &src, - Operator &op) { - CUTE_STATIC_ASSERT_V(size(dst) == size(src)); -#pragma unroll - for (int i = 0; i < size(dst); i++) { - dst(i) = Allreduce<4>::run(src(i), op); - } -} - -template -__device__ inline void reduce_(Tensor const &tensor, - Tensor &summary, - Operator &op) { - thread_reduce_(tensor, summary, op); - quad_allreduce_(summary, summary, op); -} - -template -__device__ inline void reduce_max(Tensor const &tensor, - Tensor &max) { - MaxOp max_op; - reduce_(tensor, max, max_op); -} - -template -__device__ inline void reduce_sum(Tensor const &tensor, - Tensor &sum) { - SumOp sum_op; - reduce_(tensor, sum, sum_op); -} - -// Apply the exp to all the elements. -template -inline __device__ void scale_apply_exp2(Tensor &tensor, - Tensor const &max, - const float scale) { - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); -#pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - // If max is -inf, then all elements must have been -inf (possibly due to - // masking). We don't want (-inf - (-inf)) since that would give NaN. If we - // don't have float around M_LOG2E the multiplication is done in fp64. - const float max_scaled = - max(mi) == -INFINITY ? 0.f - : max(mi) * (Scale_max ? scale : float(M_LOG2E)); -#pragma unroll - for (int ni = 0; ni < size<1>(tensor); ++ni) { - // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - - // max * log_2(e)) This allows the compiler to use the ffma - // instruction instead of fadd and fmul separately. - tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); - } - } -} - -// Apply the exp to all the elements. -template -inline __device__ void max_scale_exp2_sum(Tensor &tensor, - Tensor &max, - Tensor &sum, - const float scale) { - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); -#pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - MaxOp max_op; - max(mi) = zero_init ? tensor(mi, 0) : max_op(max(mi), tensor(mi, 0)); -#pragma unroll - for (int ni = 1; ni < size<1>(tensor); ni++) { - max(mi) = max_op(max(mi), tensor(mi, ni)); - } - max(mi) = Allreduce<4>::run(max(mi), max_op); - // If max is -inf, then all elements must have been -inf (possibly due to - // masking). We don't want (-inf - (-inf)) since that would give NaN. - const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * scale; - sum(mi) = 0; -#pragma unroll - for (int ni = 0; ni < size<1>(tensor); ++ni) { - // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - - // max * log_2(e)) This allows the compiler to use the ffma - // instruction instead of fadd and fmul separately. - tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); - sum(mi) += tensor(mi, ni); - } - SumOp sum_op; - sum(mi) = Allreduce<4>::run(sum(mi), sum_op); - } -} - -template -inline __device__ void apply_mask(Tensor &tensor, - const uint32_t max_seqlen_k, - const uint32_t col_idx_offset_ = 0) { - // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) - static_assert(Layout::rank == 2, "Only support 2D Tensor"); - const uint32_t lane_id = threadIdx.x % 32; - const uint32_t col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; -#pragma unroll - for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { - const uint32_t col_idx_base = col_idx_offset + nj * 8; -#pragma unroll - for (int j = 0; j < size<1, 0>(tensor); ++j) { - const uint32_t col_idx = col_idx_base + j; - if (col_idx >= max_seqlen_k) { -// Without the "make_coord" we get wrong results -#pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - tensor(mi, make_coord(j, nj)) = -INFINITY; - } - } - } - } -} - -template -inline __device__ void apply_mask_causal(Tensor &tensor, - const uint32_t col_idx_offset_, - const uint32_t max_seqlen_k, - const uint32_t row_idx_offset_, - const uint32_t warp_row_stride) { - // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) - static_assert(Layout::rank == 2, "Only support 2D Tensor"); - const uint32_t lane_id = threadIdx.x % 32; - // const uint32_t row_idx_offset = row_idx_offset_ + lane_id / 4; - const uint32_t row_idx_offset = row_idx_offset_; - const uint32_t col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; -#pragma unroll - for (int mi = 0; mi < size<0, 1>(tensor); ++mi) { - const uint32_t row_idx_base = row_idx_offset + mi * warp_row_stride; -#pragma unroll - for (int i = 0; i < size<0, 0>(tensor); ++i) { - const uint32_t row_idx = row_idx_base + i * 8; - const uint32_t col_idx_limit = std::min(max_seqlen_k, row_idx + 1); -#pragma unroll - for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { - const uint32_t col_idx_base = col_idx_offset + nj * 8; -#pragma unroll - for (int j = 0; j < size<1, 0>(tensor); ++j) { - const uint32_t col_idx = col_idx_base + j; - if (col_idx >= col_idx_limit) { - tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY; - } - } - } - // if (cute::thread0()) { - // printf("mi = %d, i = %d, row_idx = %d, max_seqlen_k = %d\n", mi, i, - // row_idx, max_seqlen_k); print(tensor(make_coord(i, mi), _)); - // // print(tensor(_, j + nj * size<1, 0>(tensor))); - // } - } - } -} - -template -inline __device__ void apply_mask_causal_w_idx( - Tensor &tensor, - Tensor const &idx_rowcol, const uint32_t col_idx_offset_, - const uint32_t max_seqlen_k, const uint32_t row_idx_offset_) { - // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 2, "Only support 2D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(tensor) == size<0>(idx_rowcol)); - CUTE_STATIC_ASSERT_V(size<1>(tensor) == size<1>(idx_rowcol)); -#pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - const uint32_t col_idx_limit = - std::min(max_seqlen_k, 1 + row_idx_offset_ + get<0>(idx_rowcol(mi, 0))); -#pragma unroll - for (int ni = 0; ni < size<1, 1>(tensor); ++ni) { - if (col_idx_offset_ + get<1>(idx_rowcol(0, ni)) >= col_idx_limit) { - tensor(mi, ni) = -INFINITY; - } - } - // if (cute::thread0()) { - // printf("ni = %d, j = %d, col_idx = %d, max_seqlen_k = %d\n", ni, j, - // col_idx, max_seqlen_k); print(tensor(_, make_coord(j, ni))); - // // print(tensor(_, j + ni * size<1, 0>(tensor))); - // } - } -} - -template -inline __device__ void -apply_dropout(Tensor &tensor, uint8_t p_dropout_in_uint8_t, - unsigned long long seed, unsigned long long offset, - uint32_t block_row_start, uint32_t block_col_start, - uint32_t block_row_stride) { - // tensor has shape (8, MMA_M, MMA_N / 2) - using T = typename Engine::value_type; - auto encode_dropout = [](bool keep, T val) { - return keep ? val : (encode_dropout_in_sign_bit ? -val : T(0)); - }; - static_assert(decltype(size<2>(tensor))::value % 2 == 0); - const uint16_t p_dropout_8bit_in_uint16_t = uint16_t(p_dropout_in_uint8_t); - const uint32_t p_dropout_8bit_in_uint32_t = - (uint32_t(p_dropout_8bit_in_uint16_t) << 16) | - uint32_t(p_dropout_8bit_in_uint16_t); -// if (cute::thread0()) { printf("threshold2 = 0x%x\n", -// p_dropout_8bit_in_uint32_t); } -#pragma unroll - for (int m = 0; m < size<1>(tensor); - ++m, block_row_start += block_row_stride) { - uint2 rowcol = make_uint2(block_row_start, block_col_start); -#pragma unroll - for (int n = 0; n < size<2>(tensor) / 2; ++n, ++rowcol.y) { - // if (cute::thread(32, 0)) { printf("m = %d, n = %d, row = %d, col = - // %d\n", m, n, int(rowcol.x), int(rowcol.y));} - uint4 random_uint4 = flash::philox( - seed, reinterpret_cast(rowcol), offset); - // if (cute::thread0()) { printf("philox = %u, %d, %d, %d\n", - // random_uint4.x, random_uint4.y, random_uint4.z, random_uint4.w);} - uint8_t(&rnd_8)[16] = reinterpret_cast(random_uint4); - // Special implementation for 16-bit types: we duplicate the threshold to - // the low and high 16 bits of a 32-bit value, then use the f16x2 - // comparison instruction to get a mask. The low 16 bits of the mask will - // be either 0xffff or 0x0000, and the high 16 bits will be either 0xffff - // or 0x0000, depending on whether the random value is less than the - // threshold. We then do a bit-wise AND between the mask and the original - // value (in 32-bit). We're exploiting the fact that floating point - // comparison is equivalent to integer comparison, since we're comparing - // unsigned integers whose top 8-bits are zero. - if (!encode_dropout_in_sign_bit && - (std::is_same::value || - std::is_same::value)) { - uint16_t rnd_16[16]; -#pragma unroll - for (int i = 0; i < 16; i++) { - rnd_16[i] = uint16_t(rnd_8[i]); - } - uint32_t(&rnd_32)[8] = reinterpret_cast(rnd_16); -#pragma unroll - for (int j = 0; j < 2; j++) { - Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); -// if (cute::thread0()) { printf("random = 0x%x, 0x%x, 0x%x, 0x%x\n", rnd_32[j * -// 4 + 0], rnd_32[j * 4 + 1], rnd_32[j * 4 + 2], rnd_32[j * 4 + 3]); } if -// (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", -// tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } -#pragma unroll - for (int i = 0; i < 4; i++) { - uint32_t mask; - asm volatile("set.le.u32.f16x2 %0, %1, %2;\n" - : "=r"(mask) - : "r"(rnd_32[j * 4 + i]), - "r"(p_dropout_8bit_in_uint32_t)); - tensor_uint32(i) &= mask; - } - // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, - // 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), - // tensor_uint32(3)); } - } - } else { -#pragma unroll - for (int j = 0; j < 2; j++) { -#pragma unroll - for (int i = 0; i < 8; i++) { - tensor(i, m, n * 2 + j) = - encode_dropout(rnd_8[j * 8 + i] <= p_dropout_in_uint8_t, - tensor(i, m, n * 2 + j)); - } - Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); - // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, - // 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), - // tensor_uint32(3)); } - } - } - // // if ((threadIdx.x == 0) && (blockIdx.x == 0) && (blockIdx.y == 0)) { - // // printf("n = %d, ph Philox: %u, %u, %u, %u\n", n, rnd_8.x, - // rnd_8.y, rnd_8.z, rnd_8.w); - // // } - } - } -} - -} // namespace flash -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/utils.h b/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/utils.h deleted file mode 100644 index bf9ad83b2..000000000 --- a/runtime/lib/backends/cuda/providers/default/flash_attn/kernels/utils.h +++ /dev/null @@ -1,433 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, Tri Dao. - ******************************************************************************/ - -#pragma once - -#include -#include -#include - -#include - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 -#include -#endif - -#include -#include - -#include -#include -#include -#include - -//////////////////////////////////////////////////////////////////////////////////////////////////// -namespace brt { -namespace cuda { -namespace kernel { -namespace flash { - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template inline __device__ uint32_t relu2(const uint32_t x); - -template <> -inline __device__ uint32_t relu2(const uint32_t x) { - uint32_t res; - const uint32_t zero = 0u; -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - asm volatile("max.f16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); -#else - asm volatile("{\n" - "\t .reg .f16x2 sela;\n" - "\t set.gtu.u32.f16x2 sela, %1, %2;\n" - "\t and.b32 %0, sela, %1;\n" - "}\n" - : "=r"(res) - : "r"(x), "r"(zero)); -#endif - return res; -} - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 -template <> -inline __device__ uint32_t relu2(const uint32_t x) { - uint32_t res; - const uint32_t zero = 0u; - asm volatile("max.bf16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); - return res; -} -#endif - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - -template inline __device__ uint32_t convert_relu2(const float2 x); - -template <> -inline __device__ uint32_t convert_relu2(const float2 x) { - uint32_t res; - const uint32_t a = reinterpret_cast(x.x); - const uint32_t b = reinterpret_cast(x.y); - asm volatile("cvt.rn.relu.f16x2.f32 %0, %1, %2;\n" - : "=r"(res) - : "r"(b), "r"(a)); - return res; -} - -template <> -inline __device__ uint32_t convert_relu2(const float2 x) { - uint32_t res; - const uint32_t a = reinterpret_cast(x.x); - const uint32_t b = reinterpret_cast(x.y); - asm volatile("cvt.rn.relu.bf16x2.f32 %0, %1, %2;\n" - : "=r"(res) - : "r"(b), "r"(a)); - return res; -} - -#endif - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template inline __device__ float2 half2_unpack(uint32_t a); - -template <> inline __device__ float2 half2_unpack<__half>(uint32_t a) { - return __half22float2(reinterpret_cast<__half2(&)>(a)); -} - -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 -template <> inline __device__ float2 half2_unpack<__nv_bfloat16>(uint32_t a) { - return __bfloat1622float2(reinterpret_cast<__nv_bfloat162(&)>(a)); -} -#endif - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Convert two half2's or bf162's into float, then take their dot product. -template -inline __device__ float hfma2_to_float(const uint32_t a, const uint32_t b) { - float2 af = flash::half2_unpack(a); - float2 bf = flash::half2_unpack(b); - return af.x * bf.x + af.y * bf.y; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Converted two vectors of 8 half's or bf16's into float, then take their dot -// product. -template -inline __device__ float hmulsum8(const uint4 a, const uint4 b) { - float sum; - sum = flash::hfma2_to_float(a.x, b.x); - sum += flash::hfma2_to_float(a.y, b.y); - sum += flash::hfma2_to_float(a.z, b.z); - sum += flash::hfma2_to_float(a.w, b.w); - return sum; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template struct MaxOp { - __device__ inline T operator()(T const &x, T const &y) { - return x > y ? x : y; - } -}; - -template <> struct MaxOp { - // This is slightly faster - __device__ inline float operator()(float const &x, float const &y) { - return max(x, y); - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template struct SumOp { - __device__ inline T operator()(T const &x, T const &y) { return x + y; } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template struct Allreduce { - static_assert(THREADS == 32 || THREADS == 16 || THREADS == 8 || THREADS == 4); - template - static __device__ inline T run(T x, Operator &op) { - constexpr int OFFSET = THREADS / 2; - x = op(x, __shfl_xor_sync(uint32_t(-1), x, OFFSET)); - return Allreduce::run(x, op); - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template <> struct Allreduce<2> { - template - static __device__ inline T run(T x, Operator &op) { - x = op(x, __shfl_xor_sync(uint32_t(-1), x, 1)); - return x; - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void -gemm(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const &tCsA, - Tensor4 const &tCsB, TiledMma tiled_mma, TiledCopyA smem_tiled_copy_A, - TiledCopyB smem_tiled_copy_B, ThrCopyA smem_thr_copy_A, - ThrCopyB smem_thr_copy_B) { - CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M - CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N - CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K - Tensor tCrA_copy_view = smem_thr_copy_A.retile_D(tCrA); - CUTE_STATIC_ASSERT_V(size<1>(tCsA) == size<1>(tCrA_copy_view)); // M - Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); - CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N - if (!A_in_regs) { - cute::copy(smem_tiled_copy_A, tCsA(_, _, _0{}), tCrA_copy_view(_, _, _0{})); - } - if (!B_in_regs) { - cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); - } -#pragma unroll - for (int i = 0; i < size<2>(tCrA); ++i) { - if (i < size<2>(tCrA) - 1) { - if (!A_in_regs) { - cute::copy(smem_tiled_copy_A, tCsA(_, _, i + 1), - tCrA_copy_view(_, _, i + 1)); - } - if (!B_in_regs) { - cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), - tCrB_copy_view(_, _, i + 1)); - } - } - cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void -gemm_A_in_regs(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const &tCsB, - TiledMma tiled_mma, TiledCopy smem_tiled_copy_B, - ThrCopy smem_thr_copy_B) { - CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M - CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N - CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K - Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); - CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N - cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); -#pragma unroll - for (int i = 0; i < size<2>(tCrA); ++i) { - if (i < size<2>(tCrA) - 1) { - cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), - tCrB_copy_view(_, _, i + 1)); - } - cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Convert acc_layout from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, -// MMA_N)) -template -inline __device__ auto convert_layout_acc_rowcol(Layout acc_layout) { - static_assert(decltype(size<0>(acc_layout))::value == 4); - static_assert(decltype(rank(acc_layout))::value == 3); - auto l = logical_divide(acc_layout, Shape<_2>{}); // ((2, 2), MMA_M, MMA_N) - // TD [2023-08-13]: Idk why but get<0, 1>(l) doesn't work for Cutlass 3.2, I'm - // getting "int_tuple.hpp(74): error: conversion to inaccessible base class" - // return make_layout(make_layout(get<0, 1>(l), get<1>(l)), make_layout(get<0, - // 0>(l), get<2>(l))); - return make_layout(make_layout(get<1>(get<0>(l)), get<1>(l)), - make_layout(get<0>(get<0>(l)), get<2>(l))); -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Convert rowcol_layout from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), -// MMA_M, MMA_N / 2) if using m16n8k16, or to ((2, 2, 1), MMA_M, MMA_N) if using -// m16n8k8. -template -inline __device__ auto convert_layout_rowcol_Aregs(Layout rowcol_layout) { - using X = Underscore; - static_assert(decltype(size<0, 0>(rowcol_layout))::value == 2); - static_assert(decltype(size<1, 0>(rowcol_layout))::value == 2); - constexpr int mma_shape_K = get<2>(typename MMA_traits::Shape_MNK{}); - static_assert(mma_shape_K == 8 || mma_shape_K == 16); - constexpr int MMA_N_divisor = mma_shape_K == 8 ? 1 : 2; - auto l = logical_divide( - rowcol_layout, - Shape>>{}); // ((2, MMA_M), (2, (2, MMA_N / - // 2))) - // TD [2023-08-13]: Same error as above on Cutlass 3.2 - // return make_layout(make_layout(get<1, 0>(l), get<0, 0>(l), get<1, 1, - // 0>(l)), - // get<0, 1>(l), - // get<1, 1, 1>(l)); - return make_layout(make_layout(get<0>(get<1>(l)), get<0>(get<0>(l)), - get<0>(get<1>(get<1>(l)))), - get<1>(get<0>(l)), get<1>(get<1>(get<1>(l)))); -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ auto convert_type(Tensor const &tensor) { - using From_type = typename Engine::value_type; - constexpr int numel = decltype(size(tensor))::value; - cutlass::NumericArrayConverter convert_op; - // HACK: this requires tensor to be "contiguous" - auto frag = - convert_op(*reinterpret_cast *>( - tensor.data())); - return make_tensor(make_rmem_ptr(&frag), tensor.layout()); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void relu_(Tensor &tensor) { - constexpr int numel = decltype(size(tensor))::value; - static_assert(numel % 2 == 0); - using value_t = typename Engine::value_type; - // HACK: this requires tensor to be "contiguous" - Tensor tensor_uint32 = recast(tensor); -#pragma unroll - for (int i = 0; i < size(tensor_uint32); ++i) { - tensor_uint32(i) = relu2(tensor_uint32(i)); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// On SM80 and above, we can fuse fp32 -> fp16/bf16 conversion and relu into 1 -// instruction -template -inline __device__ auto convert_type_relu(Tensor const &tensor) { - using From_type = typename Engine::value_type; - static_assert(std::is_same_v || - std::is_same_v); - static_assert(std::is_same_v); - constexpr int numel = decltype(size(tensor))::value; - static_assert(numel % 2 == 0); -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - // HACK: this requires tensor to be "contiguous" - Tensor tensor_float2 = recast(tensor); - Tensor out_uint32 = make_tensor(tensor_float2.layout()); -#pragma unroll - for (int i = 0; i < size(out_uint32); ++i) { - out_uint32(i) = convert_relu2(tensor_float2(i)); - } - Tensor out = - make_tensor(make_rmem_ptr(out_uint32.data()), tensor.layout()); -#else - Tensor out = flash::convert_type(tensor); - flash::relu_(out); -#endif - return out; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Blocks until all but N previous cp.async.commit_group operations have -// committed. This differs from cute::cp_async_wait in that when N = 0 we don't -// call cp.async.wait_all (which is equivalent to commit_group then wait_group -// 0). Instead we just call cp.async.wait_group 0, which is slightly faster. -// https://github.com/NVIDIA/cutlass/blob/master/include/cute/arch/copy_sm80.hpp#L113 -template CUTE_HOST_DEVICE void cp_async_wait() { -#if defined(CUTE_ARCH_CP_ASYNC_SM80_ENABLED) - asm volatile("cp.async.wait_group %0;\n" ::"n"(N)); -#endif -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void -copy(TiledCopy tiled_copy, Tensor const &S, - Tensor &D, Tensor const &identity_MN, - Tensor const &predicate_K, int max_MN = 0) { - CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); - CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); - CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K - // There's no case where !Clear_OOB_K && Clear_OOB_MN - static_assert(!(Clear_OOB_MN && !Clear_OOB_K)); -#pragma unroll - for (int m = 0; m < size<1>(S); ++m) { - if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { -#pragma unroll - for (int k = 0; k < size<2>(S); ++k) { - if (Is_even_K || predicate_K(k)) { - cute::copy(tiled_copy, S(_, m, k), D(_, m, k)); - } else if (Clear_OOB_K) { - cute::clear(D(_, m, k)); - } - } - } else if (Clear_OOB_MN) { - cute::clear(D(_, m, _)); - } - } - // TD [2023-04-13]: Strange that the code below can cause race condition. - // I think it's because the copies are under an if statement. - // if (Is_even_K) { - // #pragma unroll - // for (int m = 0; m < size<1>(S); ++m) { - // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { - // copy(tiled_copy, S(_, m, _), D(_, m, _)); - // } else if (Clear_OOB_MN) { - // clear(D(_, m, _)); - // } - // } - // } else { // It's slightly faster in this case if iterate over K first - // #pragma unroll - // for (int k = 0; k < size<2>(S); ++k) { - // if (predicate_K(k)) { - // #pragma unroll - // for (int m = 0; m < size<1>(S); ++m) { - // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { - // copy(tiled_copy, S(_, m, k), D(_, m, k)); - // } else if (Clear_OOB_MN) { - // clear(D(_, m, k)); - // } - // } - // } else if (Clear_OOB_K) { // There's no case where !Clear_OOB_K && - // Clear_OOB_MN - // if (Clear_OOB_MN || Is_even_MN) { - // clear(D(_, _, k)); - // } else { - // #pragma unroll - // for (int m = 0; m < size<1>(S); ++m) { - // if (!(Is_even_MN || get<0>(identity_MN(0, m, 0)) < - // max_MN)) { - // clear(D(_, m, k)); - // } - // } - // } - // } - // } - // } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -} // namespace flash -} // namespace kernel -} // namespace cuda -} // namespace brt \ No newline at end of file diff --git a/runtime/lib/core/framework/op_accessor.cc b/runtime/lib/core/framework/op_accessor.cc index 975f0982e..ec4e46213 100644 --- a/runtime/lib/core/framework/op_accessor.cc +++ b/runtime/lib/core/framework/op_accessor.cc @@ -196,6 +196,38 @@ std::vector OpAccessor::GetAttrAsVector(const std::string &name) const { BRT_THROW("Attribute " + name + " is not supported to get as vector"); } +void *OpAccessor::GetAttrAsVoidPtr(const std::string &name) const { + if (auto attr = info_.GetOperation()->getAttrOfType(name)) { + size_t totalSize = 0; + for (Attribute elementAttr : attr) { + if (auto floatAttr = dyn_cast(elementAttr)) { + totalSize += sizeof(float); + } else if (auto intAttr = dyn_cast(elementAttr)) { + totalSize += sizeof(int64_t); + } else { + // TODO: support string + BRT_THROW("Not all elements can be converted to void * for attribute" + + name); + } + } + void *result = malloc(totalSize); + int ptr = 0; + for (Attribute elementAttr : attr) { + if (auto floatAttr = dyn_cast(elementAttr)) { + float val = floatAttr.getValueAsDouble(); + std::memcpy(static_cast(result) + ptr, &val, sizeof(float)); + ptr += sizeof(float); + } else if (auto intAttr = dyn_cast(elementAttr)) { + int64_t val = intAttr.getInt(); + std::memcpy(static_cast(result) + ptr, &val, sizeof(int64_t)); + ptr += sizeof(int64_t); + } + } + return result; + } + BRT_THROW("Attribute " + name + " is not supported to get as void *"); +} + std::string OpAccessor::GetUID() const { auto byre_op = llvm::cast(info_.GetOperation()); return ByREHandle::GetOpUID(byre_op); diff --git a/runtime/test/backends/cuda/providers/default/kernel/flash_attn_bwd_test.cc b/runtime/test/backends/cuda/providers/default/kernel/flash_attn_bwd_test.cc index 3bdaf3465..0eb74737b 100644 --- a/runtime/test/backends/cuda/providers/default/kernel/flash_attn_bwd_test.cc +++ b/runtime/test/backends/cuda/providers/default/kernel/flash_attn_bwd_test.cc @@ -15,8 +15,6 @@ // //===----------------------------------------------------------------------===// -#if BRT_ENABLE_FLASH_ATTENTION - #include "brt/backends/cuda/device/common/cuda_call.h" #include "brt/backends/cuda/device/cuda_allocator.h" #include "brt/backends/cuda/providers/default/cuda_provider.h" @@ -288,5 +286,3 @@ TEST(SM80CUDATestFlashAttnBwd, Basic) { std::cout << "dv max_diff (ratio):" << max_diff << std::endl; }); } - -#endif // BRT_ENABLE_FLASH_ATTENTION diff --git a/runtime/test/backends/cuda/providers/default/kernel/flash_attn_fwd_test.cc b/runtime/test/backends/cuda/providers/default/kernel/flash_attn_fwd_test.cc index f3683b8ad..4b4bf67bc 100644 --- a/runtime/test/backends/cuda/providers/default/kernel/flash_attn_fwd_test.cc +++ b/runtime/test/backends/cuda/providers/default/kernel/flash_attn_fwd_test.cc @@ -15,8 +15,6 @@ // //===----------------------------------------------------------------------===// -#if BRT_ENABLE_FLASH_ATTENTION - #include "brt/backends/cuda/device/common/cuda_call.h" #include "brt/backends/cuda/device/cuda_allocator.h" #include "brt/backends/cuda/providers/default/cuda_provider.h" @@ -116,7 +114,7 @@ TEST(SM80CUDATestFlashAttnFwd, Basic) { auto status_sync = request->Sync(); BRT_TEST_CHECK_STATUS(status_sync); - PrintCUDAValues(d_o, input_len, input_len); + // PrintCUDAValues(d_o, input_len, input_len); CheckCUDABuffer<__half>( (__half *)d_o, /* size */ input_len, [&](__half *h_ptr) { @@ -152,4 +150,202 @@ TEST(SM80CUDATestFlashAttnFwd, Basic) { }); } -#endif // BRT_ENABLE_FLASH_ATTENTION +static std::string test_file_flash_attn_kvcache = + "test/test_files/flash_attn_kvcache.mlir"; +// ground_truth_file and input files are generated by running +// generate_flash_attn_ground_truth.py at test/test_files/ +static std::string kvcache_input_q_file = + "test/test_files/flash_attn_kvcache_inputs_q.data"; +static std::string kvcache_input_k_file = + "test/test_files/flash_attn_kvcache_inputs_k.data"; +static std::string kvcache_input_v_file = + "test/test_files/flash_attn_kvcache_inputs_v.data"; +static std::string kvcache_input_kcache_file = + "test/test_files/flash_attn_kvcache_inputs_kcache.data"; +static std::string kvcache_input_vcache_file = + "test/test_files/flash_attn_kvcache_inputs_vcache.data"; +static std::string kvcache_input_cache_seqlens_file = + "test/test_files/flash_attn_kvcache_inputs_cache_seqlens.data"; +static std::string kvcache_ground_truth_file = + "test/test_files/flash_attn_kvcache_outputs.data"; +static std::string kvcache_ground_truth_kcache_file = + "test/test_files/flash_attn_kvcache_outputs_kcache.data"; +static std::string kvcache_ground_truth_vcache_file = + "test/test_files/flash_attn_kvcache_outputs_vcache.data"; + +TEST(SM80CUDATestFlashAttnKVCache, Basic) { + size_t b = 2; + size_t seq_len = 128; + size_t seq_len_q = 1; + size_t num_heads = 3; + size_t head_dims = 32; + size_t input_len = b * seq_len_q * num_heads * head_dims; + size_t softmax_len = b * seq_len_q * num_heads; + size_t cache_len = b * seq_len * num_heads * head_dims; + + Session session; + auto status_allocator = CUDAAllocatorFactory(&session); + BRT_TEST_CHECK_STATUS(status_allocator); + auto status_cuda = DefaultCUDAExecutionProviderFactory(&session); + BRT_TEST_CHECK_STATUS(status_cuda); + + auto status_load = session.Load(test_file_flash_attn_kvcache, "byre"); + BRT_TEST_CHECK_STATUS(status_load); + + std::unique_ptr request; + auto status_request = session.NewRequestContext(&request); + BRT_TEST_CHECK_STATUS(status_request); + + __half *d_o; + __half *d_q; + __half *d_k; + __half *d_v; + __half *d_kcache; + __half *d_vcache; + int32_t *d_seqlen; + float *d_softmax_lse; + + cudaMalloc(&d_o, input_len * sizeof(__half)); + cudaMalloc(&d_q, input_len * sizeof(__half)); + cudaMalloc(&d_k, input_len * sizeof(__half)); + cudaMalloc(&d_v, input_len * sizeof(__half)); + cudaMalloc(&d_kcache, cache_len * sizeof(__half)); + cudaMalloc(&d_vcache, cache_len * sizeof(__half)); + cudaMalloc(&d_seqlen, b * sizeof(int32_t)); + cudaMalloc(&d_softmax_lse, softmax_len * sizeof(float)); + + ReadCUDAFloatValues(d_q, input_len, kvcache_input_q_file); + ReadCUDAFloatValues(d_k, input_len, kvcache_input_k_file); + ReadCUDAFloatValues(d_v, input_len, kvcache_input_v_file); + ReadCUDAFloatValues(d_kcache, cache_len, kvcache_input_kcache_file); + ReadCUDAFloatValues(d_vcache, cache_len, kvcache_input_vcache_file); + ReadCUDAIntegerValues(d_seqlen, b, kvcache_input_cache_seqlens_file); + AssignCUDABuffer(d_softmax_lse, softmax_len, 0.f); + AssignCUDABuffer(d_o, input_len, static_cast<__half>(0.f)); + + cudaDeviceSynchronize(); + + // PrintCUDAValues(d_o, input_len, input_len); + // PrintCUDAValues(d_q, input_len, input_len); + // PrintCUDAValues(d_k, input_len, input_len); + // PrintCUDAValues(d_v, input_len, input_len); + // PrintCUDAValues(d_softmax_lse, softmax_len, 10); + + request->BindArg(0, d_q); + request->BindArg(1, d_kcache); + request->BindArg(2, d_vcache); + request->BindArg(3, d_k); + request->BindArg(4, d_v); + request->BindArg(5, d_seqlen); + request->BindArg(6, d_o); + request->BindArg(7, d_softmax_lse); + + request->FinishIOBinding(); + + auto status_run = session.Run(*request); + BRT_TEST_CHECK_STATUS(status_run); + auto status_sync = request->Sync(); + BRT_TEST_CHECK_STATUS(status_sync); + + // PrintCUDAValues(d_o, input_len, input_len); + + CheckCUDABuffer<__half>( + (__half *)d_o, /* size */ input_len, [&](__half *h_ptr) { + __half *ground_truth = new __half[input_len]; + std::ifstream inFile; + inFile.open(kvcache_ground_truth_file); + if (inFile.is_open()) { + float num; + for (size_t i = 0; i < input_len; i++) { + inFile >> num; + // std::cout << "ground_truth[" << i << "] = " << num << std::endl; + ground_truth[i] = static_cast<__half>(num); + } + } else { + ASSERT_TRUE(false) + << "cannot open ground truth file of flash attn fwd output."; + } + inFile.close(); + float max_diff = 0.f; + for (size_t i = 0; i < input_len; ++i) { + if (abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]) > + max_diff) { + max_diff = abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]); + } + if (abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]) > 2e-6f) { + std::cout << i << " " << h_ptr[i] << " " << ground_truth[i] << " " + << abs(h_ptr[i] - ground_truth[i]) / ground_truth[i] + << std::endl; + EXPECT_TRUE(false); + } + } + std::cout << "max_diff (ratio):" << max_diff << std::endl; + }); + + // check kvcache update + CheckCUDABuffer<__half>( + (__half *)d_kcache, /* size */ cache_len, [&](__half *h_ptr) { + __half *ground_truth = new __half[cache_len]; + std::ifstream inFile; + inFile.open(kvcache_ground_truth_kcache_file); + if (inFile.is_open()) { + float num; + for (size_t i = 0; i < cache_len; i++) { + inFile >> num; + // std::cout << "ground_truth[" << i << "] = " << num << std::endl; + ground_truth[i] = static_cast<__half>(num); + } + } else { + ASSERT_TRUE(false) + << "cannot open ground truth file of flash attn fwd output."; + } + inFile.close(); + float max_diff = 0.f; + for (size_t i = 0; i < cache_len; ++i) { + if (abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]) > + max_diff) { + max_diff = abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]); + } + if (abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]) > 2e-6f) { + std::cout << i << " " << h_ptr[i] << " " << ground_truth[i] << " " + << abs(h_ptr[i] - ground_truth[i]) / ground_truth[i] + << std::endl; + EXPECT_TRUE(false); + } + } + std::cout << "max_diff (ratio):" << max_diff << std::endl; + }); + + CheckCUDABuffer<__half>( + (__half *)d_vcache, /* size */ cache_len, [&](__half *h_ptr) { + __half *ground_truth = new __half[cache_len]; + std::ifstream inFile; + inFile.open(kvcache_ground_truth_vcache_file); + if (inFile.is_open()) { + float num; + for (size_t i = 0; i < cache_len; i++) { + inFile >> num; + // std::cout << "ground_truth[" << i << "] = " << num << std::endl; + ground_truth[i] = static_cast<__half>(num); + } + } else { + ASSERT_TRUE(false) + << "cannot open ground truth file of flash attn fwd output."; + } + inFile.close(); + float max_diff = 0.f; + for (size_t i = 0; i < cache_len; ++i) { + if (abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]) > + max_diff) { + max_diff = abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]); + } + if (abs(h_ptr[i] - ground_truth[i]) / abs(ground_truth[i]) > 2e-6f) { + std::cout << i << " " << h_ptr[i] << " " << ground_truth[i] << " " + << abs(h_ptr[i] - ground_truth[i]) / ground_truth[i] + << std::endl; + EXPECT_TRUE(false); + } + } + std::cout << "max_diff (ratio):" << max_diff << std::endl; + }); +} diff --git a/runtime/test/test_files/flash_attn_bwd.mlir b/runtime/test/test_files/flash_attn_bwd.mlir index d8ad4d90a..484a54af4 100644 --- a/runtime/test/test_files/flash_attn_bwd.mlir +++ b/runtime/test/test_files/flash_attn_bwd.mlir @@ -12,8 +12,8 @@ module attributes {byre.container_module} { %arg10 : memref<1x3x128xf32, "cuda"> {byre.argname = "d_SoftmaxLse", byre.argtype = 2: i32}, %arg11 : memref<1x3x128x32xf32, "cuda"> {byre.argname = "d_Q_accum", byre.argtype = 2: i32}, %arg12 : memref<1x3x128x128xf32, "cuda"> {byre.argname = "SoftmaxPtr", byre.argtype = 2: i32}) attributes {byre.entry_point} { - byre.compute @byteir.flash_attn_fwd(%arg1, %arg2, %arg3, %arg9, %arg4, %arg5, %arg12) {causal = true, dropout_p = 0.000000e+00 : f32, return_softmax = false, softmax_scale = 0.500000e+00 : f32} : memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<2xi64, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda"> - byre.compute @byteir.flash_attn_bwd(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg9, %arg6, %arg7, %arg8, %arg10, %arg11) {causal = true, dropout_p = 0.000000e+00 : f32, softmax_scale = 0.500000e+00 : f32} : memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<2xi64, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x32xf32, "cuda"> + "byre.custom"(%arg1, %arg2, %arg3, %arg4, %arg5, %arg12, %arg9) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_fwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda">, memref<2xi64, "cuda">) -> () + "byre.custom"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg9, %arg6, %arg7, %arg8, %arg10, %arg11) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_bwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<2xi64, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x32xf32, "cuda">) -> () return } -} \ No newline at end of file +} diff --git a/runtime/test/test_files/flash_attn_fwd.mlir b/runtime/test/test_files/flash_attn_fwd.mlir index 358b551f9..e505fa621 100644 --- a/runtime/test/test_files/flash_attn_fwd.mlir +++ b/runtime/test/test_files/flash_attn_fwd.mlir @@ -6,7 +6,7 @@ module attributes {byre.container_module} { %arg4 : memref<1x3x128xf32, "cuda"> {byre.argname = "SoftmaxLse", byre.argtype = 2: i32}, %arg5 : memref<1x3x128x128xf32, "cuda"> {byre.argname = "SoftmaxPtr", byre.argtype = 2: i32}, %arg6 : memref<2xi64, "cuda"> {byre.argname = "RngState", byre.argtype = 2: i32}) attributes {byre.entry_point} { - byre.compute @byteir.flash_attn_fwd(%arg0, %arg1, %arg2, %arg6, %arg3, %arg4, %arg5) {causal = true, dropout_p = 0.000000e+00 : f32, return_softmax = false, softmax_scale = 0.500000e+00 : f32} : memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<2xi64, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda"> + "byre.custom"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_fwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda">, memref<2xi64, "cuda">) -> () return } } diff --git a/runtime/test/test_files/flash_attn_kvcache.mlir b/runtime/test/test_files/flash_attn_kvcache.mlir new file mode 100644 index 000000000..fa5e5b243 --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache.mlir @@ -0,0 +1,13 @@ +module attributes {byre.container_module} { + func.func @test_flash_attn_kvcache(%arg0 : memref<2x1x3x32xf16, "cuda"> {byre.argname = "Q", byre.argtype = 2: i32}, + %arg1 : memref<2x128x3x32xf16, "cuda"> {byre.argname = "KCache", byre.argtype = 2: i32}, + %arg2 : memref<2x128x3x32xf16, "cuda"> {byre.argname = "VCache", byre.argtype = 2: i32}, + %arg3 : memref<2x1x3x32xf16, "cuda"> {byre.argname = "K", byre.argtype = 2: i32}, + %arg4 : memref<2x1x3x32xf16, "cuda"> {byre.argname = "V", byre.argtype = 2: i32}, + %arg5: memref<2xi32, "cuda"> {byre.argname = "SeqLenK", byre.argtype = 2: i32}, + %arg6: memref<2x1x3x32xf16, "cuda"> {byre.argname = "Output", byre.argtype = 2: i32}, + %arg7 : memref<2x3x1xf32, "cuda"> {byre.argname = "SoftmaxLse", byre.argtype = 2: i32}) attributes {byre.entry_point} { + "byre.custom"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_kvcache", extra_args = [96 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 2 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 1 : i64, 0.5 : f32, 1 : i64, 128 : i64, 128 : i64, 128 : i64, -1 : i64, -1 : i64]} : (memref<2x1x3x32xf16, "cuda">, memref<2x128x3x32xf16, "cuda">, memref<2x128x3x32xf16, "cuda">, memref<2x1x3x32xf16, "cuda">, memref<2x1x3x32xf16, "cuda">, memref<2xi32, "cuda">, memref<2x1x3x32xf16, "cuda">, memref<2x3x1xf32, "cuda">) -> () + return + } +} diff --git a/runtime/test/test_files/flash_attn_kvcache_inputs_cache_seqlens.data b/runtime/test/test_files/flash_attn_kvcache_inputs_cache_seqlens.data new file mode 100644 index 000000000..be06f1028 --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_inputs_cache_seqlens.data @@ -0,0 +1 @@ +64 64 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_inputs_k.data b/runtime/test/test_files/flash_attn_kvcache_inputs_k.data new file mode 100644 index 000000000..73566e1c1 --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_inputs_k.data @@ -0,0 +1 @@ +0.0 2.4974346160888672e-05 5.0008296966552734e-05 7.49826431274414e-05 0.00010001659393310547 0.00012505054473876953 0.0001499652862548828 0.00017499923706054688 0.00020003318786621094 0.00022494792938232422 0.00025010108947753906 0.00027489662170410156 0.0002999305725097656 0.0003249645233154297 0.00034999847412109375 0.0003750324249267578 0.0004000663757324219 0.00042510032653808594 0.00044989585876464844 0.0004749298095703125 0.0005002021789550781 0.0005249977111816406 0.0005497932434082031 0.0005750656127929688 0.0005998611450195312 0.0006251335144042969 0.0006499290466308594 0.000675201416015625 0.0006999969482421875 0.00072479248046875 0.0007500648498535156 0.0007748603820800781 0.0008001327514648438 0.0008249282836914062 0.0008502006530761719 0.0008749961853027344 0.0008997917175292969 0.0009250640869140625 0.000949859619140625 0.0009751319885253906 0.0010004043579101562 0.0010251998901367188 0.0010499954223632812 0.0010747909545898438 0.0010995864868164062 0.001125335693359375 0.0011501312255859375 0.0011749267578125 0.0011997222900390625 0.0012254714965820312 0.0012502670288085938 0.0012750625610351562 0.0012998580932617188 0.0013246536254882812 0.00135040283203125 0.0013751983642578125 0.001399993896484375 0.0014247894287109375 0.0014495849609375 0.0014753341674804688 0.0015001296997070312 0.0015249252319335938 0.0015497207641601562 0.001575469970703125 0.0016002655029296875 0.00162506103515625 0.0016498565673828125 0.001674652099609375 0.0017004013061523438 0.0017251968383789062 0.0017499923706054688 0.0017747879028320312 0.0017995834350585938 0.0018253326416015625 0.001850128173828125 0.0018749237060546875 0.00189971923828125 0.0019254684448242188 0.0019502639770507812 0.0019741058349609375 0.0020008087158203125 0.002025604248046875 0.0020503997802734375 0.0020751953125 0.0020999908447265625 0.002124786376953125 0.0021495819091796875 0.00217437744140625 0.0021991729736328125 0.0022258758544921875 0.00225067138671875 0.0022754669189453125 0.002300262451171875 0.0023250579833984375 0.002349853515625 0.0023746490478515625 0.002399444580078125 0.0024242401123046875 0.0024509429931640625 0.002475738525390625 0.0025005340576171875 0.00252532958984375 0.0025501251220703125 0.002574920654296875 0.0025997161865234375 0.00262451171875 0.0026493072509765625 0.002674102783203125 0.0027008056640625 0.0027256011962890625 0.002750396728515625 0.0027751922607421875 0.00279998779296875 0.0028247833251953125 0.002849578857421875 0.0028743743896484375 0.002899169921875 0.002925872802734375 0.0029506683349609375 0.0029754638671875 0.0030002593994140625 0.003025054931640625 0.0030498504638671875 0.00307464599609375 0.0030994415283203125 0.003124237060546875 0.00315093994140625 0.0031757354736328125 0.003200531005859375 0.0032253265380859375 0.0032501220703125 0.0032749176025390625 0.003299713134765625 0.0033245086669921875 0.00334930419921875 0.0033740997314453125 0.0034008026123046875 0.00342559814453125 0.0034503936767578125 0.003475189208984375 0.0034999847412109375 0.0035247802734375 0.0035495758056640625 0.003574371337890625 0.0035991668701171875 0.0036258697509765625 0.003650665283203125 0.0036754608154296875 0.00370025634765625 0.0037250518798828125 0.003749847412109375 0.0037746429443359375 0.0037994384765625 0.0038242340087890625 0.0038509368896484375 0.003875732421875 0.0039005279541015625 0.003925323486328125 0.003948211669921875 0.00397491455078125 0.004001617431640625 0.004024505615234375 0.00405120849609375 0.0040740966796875 0.004100799560546875 0.004123687744140625 0.004150390625 0.00417327880859375 0.004199981689453125 0.0042266845703125 0.00424957275390625 0.004276275634765625 0.004299163818359375 0.00432586669921875 0.0043487548828125 0.004375457763671875 0.004398345947265625 0.004425048828125 0.004451751708984375 0.004474639892578125 0.0045013427734375 0.00452423095703125 0.004550933837890625 0.004573822021484375 0.00460052490234375 0.0046234130859375 0.004650115966796875 0.00467681884765625 0.00469970703125 0.004726409912109375 0.004749298095703125 0.0047760009765625 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_inputs_kcache.data b/runtime/test/test_files/flash_attn_kvcache_inputs_kcache.data new file mode 100644 index 000000000..8abb42b17 --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_inputs_kcache.data @@ -0,0 +1 @@ +0.0 2.4974346160888672e-05 5.0008296966552734e-05 7.49826431274414e-05 0.00010001659393310547 0.00012505054473876953 0.0001499652862548828 0.00017499923706054688 0.00020003318786621094 0.00022494792938232422 0.00025010108947753906 0.00027489662170410156 0.0002999305725097656 0.0003249645233154297 0.00034999847412109375 0.0003750324249267578 0.0004000663757324219 0.00042510032653808594 0.00044989585876464844 0.0004749298095703125 0.0005002021789550781 0.0005249977111816406 0.0005497932434082031 0.0005750656127929688 0.0005998611450195312 0.0006251335144042969 0.0006499290466308594 0.000675201416015625 0.0006999969482421875 0.00072479248046875 0.0007500648498535156 0.0007748603820800781 0.0008001327514648438 0.0008249282836914062 0.0008502006530761719 0.0008749961853027344 0.0008997917175292969 0.0009250640869140625 0.000949859619140625 0.0009751319885253906 0.0010004043579101562 0.0010251998901367188 0.0010499954223632812 0.0010747909545898438 0.0010995864868164062 0.001125335693359375 0.0011501312255859375 0.0011749267578125 0.0011997222900390625 0.0012254714965820312 0.0012502670288085938 0.0012750625610351562 0.0012998580932617188 0.0013246536254882812 0.00135040283203125 0.0013751983642578125 0.001399993896484375 0.0014247894287109375 0.0014495849609375 0.0014753341674804688 0.0015001296997070312 0.0015249252319335938 0.0015497207641601562 0.001575469970703125 0.0016002655029296875 0.00162506103515625 0.0016498565673828125 0.001674652099609375 0.0017004013061523438 0.0017251968383789062 0.0017499923706054688 0.0017747879028320312 0.0017995834350585938 0.0018253326416015625 0.001850128173828125 0.0018749237060546875 0.00189971923828125 0.0019254684448242188 0.0019502639770507812 0.0019741058349609375 0.0020008087158203125 0.002025604248046875 0.0020503997802734375 0.0020751953125 0.0020999908447265625 0.002124786376953125 0.0021495819091796875 0.00217437744140625 0.0021991729736328125 0.0022258758544921875 0.00225067138671875 0.0022754669189453125 0.002300262451171875 0.0023250579833984375 0.002349853515625 0.0023746490478515625 0.002399444580078125 0.0024242401123046875 0.0024509429931640625 0.002475738525390625 0.0025005340576171875 0.00252532958984375 0.0025501251220703125 0.002574920654296875 0.0025997161865234375 0.00262451171875 0.0026493072509765625 0.002674102783203125 0.0027008056640625 0.0027256011962890625 0.002750396728515625 0.0027751922607421875 0.00279998779296875 0.0028247833251953125 0.002849578857421875 0.0028743743896484375 0.002899169921875 0.002925872802734375 0.0029506683349609375 0.0029754638671875 0.0030002593994140625 0.003025054931640625 0.0030498504638671875 0.00307464599609375 0.0030994415283203125 0.003124237060546875 0.00315093994140625 0.0031757354736328125 0.003200531005859375 0.0032253265380859375 0.0032501220703125 0.0032749176025390625 0.003299713134765625 0.0033245086669921875 0.00334930419921875 0.0033740997314453125 0.0034008026123046875 0.00342559814453125 0.0034503936767578125 0.003475189208984375 0.0034999847412109375 0.0035247802734375 0.0035495758056640625 0.003574371337890625 0.0035991668701171875 0.0036258697509765625 0.003650665283203125 0.0036754608154296875 0.00370025634765625 0.0037250518798828125 0.003749847412109375 0.0037746429443359375 0.0037994384765625 0.0038242340087890625 0.0038509368896484375 0.003875732421875 0.0039005279541015625 0.003925323486328125 0.003948211669921875 0.00397491455078125 0.004001617431640625 0.004024505615234375 0.00405120849609375 0.0040740966796875 0.004100799560546875 0.004123687744140625 0.004150390625 0.00417327880859375 0.004199981689453125 0.0042266845703125 0.00424957275390625 0.004276275634765625 0.004299163818359375 0.00432586669921875 0.0043487548828125 0.004375457763671875 0.004398345947265625 0.004425048828125 0.004451751708984375 0.004474639892578125 0.0045013427734375 0.00452423095703125 0.004550933837890625 0.004573822021484375 0.00460052490234375 0.0046234130859375 0.004650115966796875 0.00467681884765625 0.00469970703125 0.004726409912109375 0.004749298095703125 0.0047760009765625 0.00479888916015625 0.004825592041015625 0.004848480224609375 0.00487518310546875 0.004901885986328125 0.004924774169921875 0.00495147705078125 0.004974365234375 0.005001068115234375 0.005023956298828125 0.0050506591796875 0.00507354736328125 0.005100250244140625 0.005123138427734375 0.00514984130859375 0.005176544189453125 0.005199432373046875 0.00522613525390625 0.0052490234375 0.005275726318359375 0.005298614501953125 0.0053253173828125 0.00534820556640625 0.005374908447265625 0.005401611328125 0.00542449951171875 0.005451202392578125 0.005474090576171875 0.00550079345703125 0.005523681640625 0.005550384521484375 0.005573272705078125 0.0055999755859375 0.005626678466796875 0.005649566650390625 0.00567626953125 0.00569915771484375 0.005725860595703125 0.005748748779296875 0.00577545166015625 0.00579833984375 0.005825042724609375 0.00585174560546875 0.0058746337890625 0.005901336669921875 0.005924224853515625 0.005950927734375 0.00597381591796875 0.006000518798828125 0.006023406982421875 0.00605010986328125 0.006076812744140625 0.006099700927734375 0.00612640380859375 0.0061492919921875 0.006175994873046875 0.006198883056640625 0.0062255859375 0.00624847412109375 0.006275177001953125 0.0063018798828125 0.00632476806640625 0.006351470947265625 0.006374359130859375 0.00640106201171875 0.0064239501953125 0.006450653076171875 0.006473541259765625 0.006500244140625 0.00652313232421875 0.006549835205078125 0.0065765380859375 0.00659942626953125 0.006626129150390625 0.006649017333984375 0.00667572021484375 0.0066986083984375 0.006725311279296875 0.006748199462890625 0.00677490234375 0.006801605224609375 0.006824493408203125 0.0068511962890625 0.00687408447265625 0.006900787353515625 0.006923675537109375 0.00695037841796875 0.0069732666015625 0.006999969482421875 0.00702667236328125 0.007049560546875 0.007076263427734375 0.007099151611328125 0.0071258544921875 0.00714874267578125 0.007175445556640625 0.007198333740234375 0.00722503662109375 0.007251739501953125 0.007274627685546875 0.00730133056640625 0.00732421875 0.007350921630859375 0.007373809814453125 0.0074005126953125 0.00742340087890625 0.007450103759765625 0.007476806640625 0.00749969482421875 0.007526397705078125 0.007549285888671875 0.00757598876953125 0.007598876953125 0.007625579833984375 0.007648468017578125 0.0076751708984375 0.007701873779296875 0.007724761962890625 0.00775146484375 0.00777435302734375 0.007801055908203125 0.0078277587890625 0.00785064697265625 0.00787353515625 0.00789642333984375 0.00792694091796875 0.0079498291015625 0.00797271728515625 0.00800323486328125 0.008026123046875 0.00804901123046875 0.0080718994140625 0.0081024169921875 0.00812530517578125 0.008148193359375 0.0081787109375 0.00820159912109375 0.0082244873046875 0.00824737548828125 0.00827789306640625 0.00830078125 0.00832366943359375 0.0083465576171875 0.0083770751953125 0.00839996337890625 0.0084228515625 0.008453369140625 0.00847625732421875 0.0084991455078125 0.00852203369140625 0.00855255126953125 0.008575439453125 0.00859832763671875 0.0086212158203125 0.0086517333984375 0.00867462158203125 0.008697509765625 0.00872802734375 0.00875091552734375 0.0087738037109375 0.00879669189453125 0.00882720947265625 0.00885009765625 0.00887298583984375 0.00890350341796875 0.0089263916015625 0.00894927978515625 0.00897216796875 0.009002685546875 0.00902557373046875 0.0090484619140625 0.00907135009765625 0.00910186767578125 0.009124755859375 0.00914764404296875 0.00917816162109375 0.0092010498046875 0.00922393798828125 0.009246826171875 0.00927734375 0.00930023193359375 0.0093231201171875 0.0093536376953125 0.00937652587890625 0.0093994140625 0.00942230224609375 0.00945281982421875 0.0094757080078125 0.00949859619140625 0.009521484375 0.009552001953125 0.00957489013671875 0.0095977783203125 0.0096282958984375 0.00965118408203125 0.009674072265625 0.00969696044921875 0.00972747802734375 0.0097503662109375 0.00977325439453125 0.00980377197265625 0.00982666015625 0.00984954833984375 0.0098724365234375 0.0099029541015625 0.00992584228515625 0.00994873046875 0.00997161865234375 0.01000213623046875 0.0100250244140625 0.01004791259765625 0.01007843017578125 0.010101318359375 0.01012420654296875 0.0101470947265625 0.0101776123046875 0.01020050048828125 0.010223388671875 0.01024627685546875 0.01027679443359375 0.0102996826171875 0.01032257080078125 0.01035308837890625 0.0103759765625 0.01039886474609375 0.0104217529296875 0.0104522705078125 0.01047515869140625 0.010498046875 0.010528564453125 0.01055145263671875 0.0105743408203125 0.01059722900390625 0.01062774658203125 0.010650634765625 0.01067352294921875 0.0106964111328125 0.0107269287109375 0.01074981689453125 0.010772705078125 0.01080322265625 0.01082611083984375 0.0108489990234375 0.01087188720703125 0.01090240478515625 0.01092529296875 0.01094818115234375 0.01097869873046875 0.0110015869140625 0.01102447509765625 0.01104736328125 0.011077880859375 0.01110076904296875 0.0111236572265625 0.01114654541015625 0.01117706298828125 0.011199951171875 0.01122283935546875 0.01125335693359375 0.0112762451171875 0.01129913330078125 0.011322021484375 0.0113525390625 0.01137542724609375 0.0113983154296875 0.01142120361328125 0.01145172119140625 0.011474609375 0.01149749755859375 0.01152801513671875 0.0115509033203125 0.01157379150390625 0.0115966796875 0.011627197265625 0.01165008544921875 0.0116729736328125 0.0117034912109375 0.01172637939453125 0.011749267578125 0.01177215576171875 0.01180267333984375 0.0118255615234375 0.01184844970703125 0.011871337890625 0.01190185546875 0.01192474365234375 0.0119476318359375 0.0119781494140625 0.01200103759765625 0.01202392578125 0.01204681396484375 0.01207733154296875 0.0121002197265625 0.01212310791015625 0.01215362548828125 0.012176513671875 0.01219940185546875 0.0122222900390625 0.0122528076171875 0.01227569580078125 0.012298583984375 0.01232147216796875 0.01235198974609375 0.0123748779296875 0.01239776611328125 0.01242828369140625 0.012451171875 0.01247406005859375 0.0124969482421875 0.0125274658203125 0.01255035400390625 0.0125732421875 0.012603759765625 0.01262664794921875 0.0126495361328125 0.01267242431640625 0.01270294189453125 0.012725830078125 0.01274871826171875 0.0127716064453125 0.0128021240234375 0.01282501220703125 0.012847900390625 0.01287841796875 0.01290130615234375 0.0129241943359375 0.01294708251953125 0.01297760009765625 0.01300048828125 0.01302337646484375 0.0130462646484375 0.0130767822265625 0.01309967041015625 0.01312255859375 0.013153076171875 0.01317596435546875 0.0131988525390625 0.01322174072265625 0.01325225830078125 0.013275146484375 0.01329803466796875 0.01332855224609375 0.0133514404296875 0.01337432861328125 0.013397216796875 0.013427734375 0.01345062255859375 0.0134735107421875 0.01349639892578125 0.01352691650390625 0.0135498046875 0.01357269287109375 0.01360321044921875 0.0136260986328125 0.01364898681640625 0.013671875 0.013702392578125 0.01372528076171875 0.0137481689453125 0.0137786865234375 0.01380157470703125 0.013824462890625 0.01384735107421875 0.01387786865234375 0.0139007568359375 0.01392364501953125 0.013946533203125 0.01397705078125 0.01399993896484375 0.0140228271484375 0.0140533447265625 0.01407623291015625 0.01409912109375 0.01412200927734375 0.01415252685546875 0.0141754150390625 0.01419830322265625 0.01422119140625 0.014251708984375 0.01427459716796875 0.0142974853515625 0.0143280029296875 0.01435089111328125 0.014373779296875 0.01439666748046875 0.01442718505859375 0.0144500732421875 0.01447296142578125 0.01450347900390625 0.0145263671875 0.01454925537109375 0.0145721435546875 0.0146026611328125 0.01462554931640625 0.0146484375 0.01467132568359375 0.01470184326171875 0.0147247314453125 0.01474761962890625 0.01477813720703125 0.014801025390625 0.01482391357421875 0.0148468017578125 0.0148773193359375 0.01490020751953125 0.014923095703125 0.01495361328125 0.01497650146484375 0.0149993896484375 0.01502227783203125 0.01505279541015625 0.01507568359375 0.01509857177734375 0.0151214599609375 0.0151519775390625 0.01517486572265625 0.01519775390625 0.015228271484375 0.01525115966796875 0.0152740478515625 0.01529693603515625 0.01532745361328125 0.015350341796875 0.01537322998046875 0.01540374755859375 0.0154266357421875 0.01544952392578125 0.015472412109375 0.0155029296875 0.01552581787109375 0.0155487060546875 0.01557159423828125 0.01560211181640625 0.015625 0.015655517578125 0.0156707763671875 0.0157012939453125 0.0157318115234375 0.0157470703125 0.015777587890625 0.0157928466796875 0.0158233642578125 0.0158538818359375 0.015869140625 0.015899658203125 0.01593017578125 0.0159454345703125 0.0159759521484375 0.0160064697265625 0.016021728515625 0.01605224609375 0.0160675048828125 0.0160980224609375 0.0161285400390625 0.016143798828125 0.01617431640625 0.016204833984375 0.0162200927734375 0.0162506103515625 0.0162811279296875 0.01629638671875 0.016326904296875 0.016357421875 0.0163726806640625 0.0164031982421875 0.01641845703125 0.016448974609375 0.0164794921875 0.0164947509765625 0.0165252685546875 0.0165557861328125 0.016571044921875 0.0166015625 0.016632080078125 0.0166473388671875 0.0166778564453125 0.016693115234375 0.0167236328125 0.016754150390625 0.0167694091796875 0.0167999267578125 0.0168304443359375 0.016845703125 0.016876220703125 0.01690673828125 0.0169219970703125 0.0169525146484375 0.0169677734375 0.016998291015625 0.01702880859375 0.0170440673828125 0.0170745849609375 0.0171051025390625 0.017120361328125 0.01715087890625 0.017181396484375 0.0171966552734375 0.0172271728515625 0.017242431640625 0.01727294921875 0.017303466796875 0.0173187255859375 0.0173492431640625 0.0173797607421875 0.01739501953125 0.017425537109375 0.0174560546875 0.0174713134765625 0.0175018310546875 0.0175323486328125 0.017547607421875 0.017578125 0.0175933837890625 0.0176239013671875 0.0176544189453125 0.017669677734375 0.0177001953125 0.017730712890625 0.0177459716796875 0.0177764892578125 0.0178070068359375 0.017822265625 0.017852783203125 0.0178680419921875 0.0178985595703125 0.0179290771484375 0.0179443359375 0.017974853515625 0.01800537109375 0.0180206298828125 0.0180511474609375 0.0180816650390625 0.018096923828125 0.01812744140625 0.0181427001953125 0.0181732177734375 0.0182037353515625 0.018218994140625 0.01824951171875 0.018280029296875 0.0182952880859375 0.0183258056640625 0.0183563232421875 0.01837158203125 0.018402099609375 0.0184326171875 0.0184478759765625 0.0184783935546875 0.01849365234375 0.018524169921875 0.0185546875 0.0185699462890625 0.0186004638671875 0.0186309814453125 0.018646240234375 0.0186767578125 0.018707275390625 0.0187225341796875 0.0187530517578125 0.018768310546875 0.018798828125 0.018829345703125 0.0188446044921875 0.0188751220703125 0.0189056396484375 0.0189208984375 0.018951416015625 0.01898193359375 0.0189971923828125 0.0190277099609375 0.01904296875 0.019073486328125 0.01910400390625 0.0191192626953125 0.0191497802734375 0.0191802978515625 0.019195556640625 0.01922607421875 0.019256591796875 0.0192718505859375 0.0193023681640625 0.019317626953125 0.01934814453125 0.019378662109375 0.0193939208984375 0.0194244384765625 0.0194549560546875 0.01947021484375 0.019500732421875 0.01953125 0.0195465087890625 0.0195770263671875 0.0196075439453125 0.019622802734375 0.0196533203125 0.0196685791015625 0.0196990966796875 0.0197296142578125 0.019744873046875 0.019775390625 0.019805908203125 0.0198211669921875 0.0198516845703125 0.0198822021484375 0.0198974609375 0.019927978515625 0.0199432373046875 0.0199737548828125 0.0200042724609375 0.02001953125 0.020050048828125 0.02008056640625 0.0200958251953125 0.0201263427734375 0.0201568603515625 0.020172119140625 0.02020263671875 0.0202178955078125 0.0202484130859375 0.0202789306640625 0.020294189453125 0.02032470703125 0.020355224609375 0.0203704833984375 0.0204010009765625 0.0204315185546875 0.02044677734375 0.020477294921875 0.0204925537109375 0.0205230712890625 0.0205535888671875 0.02056884765625 0.020599365234375 0.0206298828125 0.0206451416015625 0.0206756591796875 0.0207061767578125 0.020721435546875 0.020751953125 0.020782470703125 0.0207977294921875 0.0208282470703125 0.020843505859375 0.0208740234375 0.020904541015625 0.0209197998046875 0.0209503173828125 0.0209808349609375 0.02099609375 0.021026611328125 0.02105712890625 0.0210723876953125 0.0211029052734375 0.0211181640625 0.021148681640625 0.02117919921875 0.0211944580078125 0.0212249755859375 0.0212554931640625 0.021270751953125 0.02130126953125 0.021331787109375 0.0213470458984375 0.0213775634765625 0.021392822265625 0.02142333984375 0.021453857421875 0.0214691162109375 0.0214996337890625 0.0215301513671875 0.02154541015625 0.021575927734375 0.0216064453125 0.0216217041015625 0.0216522216796875 0.02166748046875 0.021697998046875 0.021728515625 0.0217437744140625 0.0217742919921875 0.0218048095703125 0.021820068359375 0.0218505859375 0.021881103515625 0.0218963623046875 0.0219268798828125 0.0219573974609375 0.02197265625 0.022003173828125 0.0220184326171875 0.0220489501953125 0.0220794677734375 0.0220947265625 0.022125244140625 0.02215576171875 0.0221710205078125 0.0222015380859375 0.0222320556640625 0.022247314453125 0.02227783203125 0.0222930908203125 0.0223236083984375 0.0223541259765625 0.022369384765625 0.02239990234375 0.022430419921875 0.0224456787109375 0.0224761962890625 0.0225067138671875 0.02252197265625 0.022552490234375 0.0225677490234375 0.0225982666015625 0.0226287841796875 0.02264404296875 0.022674560546875 0.022705078125 0.0227203369140625 0.0227508544921875 0.0227813720703125 0.022796630859375 0.0228271484375 0.0228424072265625 0.0228729248046875 0.0229034423828125 0.022918701171875 0.02294921875 0.022979736328125 0.0229949951171875 0.0230255126953125 0.0230560302734375 0.0230712890625 0.023101806640625 0.02313232421875 0.0231475830078125 0.0231781005859375 0.023193359375 0.023223876953125 0.02325439453125 0.0232696533203125 0.0233001708984375 0.0233306884765625 0.023345947265625 0.02337646484375 0.023406982421875 0.0234222412109375 0.0234527587890625 0.023468017578125 0.02349853515625 0.023529052734375 0.0235443115234375 0.0235748291015625 0.0236053466796875 0.02362060546875 0.023651123046875 0.023681640625 0.0236968994140625 0.0237274169921875 0.02374267578125 0.023773193359375 0.0238037109375 0.0238189697265625 0.0238494873046875 0.0238800048828125 0.023895263671875 0.02392578125 0.023956298828125 0.0239715576171875 0.0240020751953125 0.0240325927734375 0.0240478515625 0.024078369140625 0.0240936279296875 0.0241241455078125 0.0241546630859375 0.024169921875 0.024200439453125 0.02423095703125 0.0242462158203125 0.0242767333984375 0.0243072509765625 0.024322509765625 0.02435302734375 0.0243682861328125 0.0243988037109375 0.0244293212890625 0.024444580078125 0.02447509765625 0.024505615234375 0.0245208740234375 0.0245513916015625 0.0245819091796875 0.02459716796875 0.024627685546875 0.0246429443359375 0.0246734619140625 0.0247039794921875 0.02471923828125 0.024749755859375 0.0247802734375 0.0247955322265625 0.0248260498046875 0.0248565673828125 0.024871826171875 0.02490234375 0.0249176025390625 0.0249481201171875 0.0249786376953125 0.024993896484375 0.0250244140625 0.025054931640625 0.0250701904296875 0.0251007080078125 0.0251312255859375 0.025146484375 0.025177001953125 0.02520751953125 0.0252227783203125 0.0252532958984375 0.0252685546875 0.025299072265625 0.02532958984375 0.0253448486328125 0.0253753662109375 0.0254058837890625 0.025421142578125 0.02545166015625 0.025482177734375 0.0254974365234375 0.0255279541015625 0.025543212890625 0.02557373046875 0.025604248046875 0.0256195068359375 0.0256500244140625 0.0256805419921875 0.02569580078125 0.025726318359375 0.0257568359375 0.0257720947265625 0.0258026123046875 0.02581787109375 0.025848388671875 0.02587890625 0.0258941650390625 0.0259246826171875 0.0259552001953125 0.025970458984375 0.0260009765625 0.026031494140625 0.0260467529296875 0.0260772705078125 0.026092529296875 0.026123046875 0.026153564453125 0.0261688232421875 0.0261993408203125 0.0262298583984375 0.0262451171875 0.026275634765625 0.02630615234375 0.0263214111328125 0.0263519287109375 0.0263824462890625 0.026397705078125 0.02642822265625 0.0264434814453125 0.0264739990234375 0.0265045166015625 0.026519775390625 0.02655029296875 0.026580810546875 0.0265960693359375 0.0266265869140625 0.0266571044921875 0.02667236328125 0.026702880859375 0.0267181396484375 0.0267486572265625 0.0267791748046875 0.02679443359375 0.026824951171875 0.02685546875 0.0268707275390625 0.0269012451171875 0.0269317626953125 0.026947021484375 0.0269775390625 0.0269927978515625 0.0270233154296875 0.0270538330078125 0.027069091796875 0.027099609375 0.027130126953125 0.0271453857421875 0.0271759033203125 0.0272064208984375 0.0272216796875 0.027252197265625 0.0272674560546875 0.0272979736328125 0.0273284912109375 0.02734375 0.027374267578125 0.02740478515625 0.0274200439453125 0.0274505615234375 0.0274810791015625 0.027496337890625 0.02752685546875 0.027557373046875 0.0275726318359375 0.0276031494140625 0.027618408203125 0.02764892578125 0.027679443359375 0.0276947021484375 0.0277252197265625 0.0277557373046875 0.02777099609375 0.027801513671875 0.02783203125 0.0278472900390625 0.0278778076171875 0.02789306640625 0.027923583984375 0.0279541015625 0.0279693603515625 0.0279998779296875 0.0280303955078125 0.028045654296875 0.028076171875 0.028106689453125 0.0281219482421875 0.0281524658203125 0.028167724609375 0.0281982421875 0.028228759765625 0.0282440185546875 0.0282745361328125 0.0283050537109375 0.0283203125 0.028350830078125 0.02838134765625 0.0283966064453125 0.0284271240234375 0.0284423828125 0.028472900390625 0.02850341796875 0.0285186767578125 0.0285491943359375 0.0285797119140625 0.028594970703125 0.02862548828125 0.028656005859375 0.0286712646484375 0.0287017822265625 0.0287322998046875 0.02874755859375 0.028778076171875 0.0287933349609375 0.0288238525390625 0.0288543701171875 0.02886962890625 0.028900146484375 0.0289306640625 0.0289459228515625 0.0289764404296875 0.0290069580078125 0.029022216796875 0.029052734375 0.0290679931640625 0.0290985107421875 0.0291290283203125 0.029144287109375 0.0291748046875 0.029205322265625 0.0292205810546875 0.0292510986328125 0.0292816162109375 0.029296875 0.029327392578125 0.0293426513671875 0.0293731689453125 0.0294036865234375 0.0294189453125 0.029449462890625 0.02947998046875 0.0294952392578125 0.0295257568359375 0.0295562744140625 0.029571533203125 0.02960205078125 0.029632568359375 0.0296478271484375 0.0296783447265625 0.029693603515625 0.02972412109375 0.029754638671875 0.0297698974609375 0.0298004150390625 0.0298309326171875 0.02984619140625 0.029876708984375 0.0299072265625 0.0299224853515625 0.0299530029296875 0.02996826171875 0.029998779296875 0.030029296875 0.0300445556640625 0.0300750732421875 0.0301055908203125 0.030120849609375 0.0301513671875 0.030181884765625 0.0301971435546875 0.0302276611328125 0.030242919921875 0.0302734375 0.030303955078125 0.0303192138671875 0.0303497314453125 0.0303802490234375 0.0303955078125 0.030426025390625 0.03045654296875 0.0304718017578125 0.0305023193359375 0.030517578125 0.030548095703125 0.03057861328125 0.0305938720703125 0.0306243896484375 0.0306549072265625 0.030670166015625 0.03070068359375 0.030731201171875 0.0307464599609375 0.0307769775390625 0.0308074951171875 0.03082275390625 0.030853271484375 0.0308685302734375 0.0308990478515625 0.0309295654296875 0.03094482421875 0.030975341796875 0.031005859375 0.0310211181640625 0.0310516357421875 0.0310821533203125 0.031097412109375 0.0311279296875 0.0311431884765625 0.0311737060546875 0.0312042236328125 0.031219482421875 0.03125 0.031280517578125 0.03131103515625 0.03131103515625 0.031341552734375 0.0313720703125 0.031402587890625 0.03143310546875 0.031463623046875 0.031463623046875 0.031494140625 0.031524658203125 0.03155517578125 0.031585693359375 0.031585693359375 0.0316162109375 0.031646728515625 0.03167724609375 0.031707763671875 0.03173828125 0.03173828125 0.031768798828125 0.03179931640625 0.031829833984375 0.0318603515625 0.0318603515625 0.031890869140625 0.03192138671875 0.031951904296875 0.031982421875 0.032012939453125 0.032012939453125 0.03204345703125 0.032073974609375 0.0321044921875 0.032135009765625 0.032135009765625 0.03216552734375 0.032196044921875 0.0322265625 0.032257080078125 0.03228759765625 0.03228759765625 0.032318115234375 0.0323486328125 0.032379150390625 0.03240966796875 0.032440185546875 0.032440185546875 0.032470703125 0.032501220703125 0.03253173828125 0.032562255859375 0.032562255859375 0.0325927734375 0.032623291015625 0.03265380859375 0.032684326171875 0.03271484375 0.03271484375 0.032745361328125 0.03277587890625 0.032806396484375 0.0328369140625 0.0328369140625 0.032867431640625 0.03289794921875 0.032928466796875 0.032958984375 0.032989501953125 0.032989501953125 0.03302001953125 0.033050537109375 0.0330810546875 0.033111572265625 0.033111572265625 0.03314208984375 0.033172607421875 0.033203125 0.033233642578125 0.03326416015625 0.03326416015625 0.033294677734375 0.0333251953125 0.033355712890625 0.03338623046875 0.03338623046875 0.033416748046875 0.033447265625 0.033477783203125 0.03350830078125 0.033538818359375 0.033538818359375 0.0335693359375 0.033599853515625 0.03363037109375 0.033660888671875 0.033660888671875 0.03369140625 0.033721923828125 0.03375244140625 0.033782958984375 0.0338134765625 0.0338134765625 0.033843994140625 0.03387451171875 0.033905029296875 0.033935546875 0.033935546875 0.033966064453125 0.03399658203125 0.034027099609375 0.0340576171875 0.034088134765625 0.034088134765625 0.03411865234375 0.034149169921875 0.0341796875 0.034210205078125 0.034210205078125 0.03424072265625 0.034271240234375 0.0343017578125 0.034332275390625 0.03436279296875 0.03436279296875 0.034393310546875 0.034423828125 0.034454345703125 0.03448486328125 0.03448486328125 0.034515380859375 0.0345458984375 0.034576416015625 0.03460693359375 0.034637451171875 0.034637451171875 0.03466796875 0.034698486328125 0.03472900390625 0.034759521484375 0.0347900390625 0.0347900390625 0.034820556640625 0.03485107421875 0.034881591796875 0.034912109375 0.034912109375 0.034942626953125 0.03497314453125 0.035003662109375 0.0350341796875 0.035064697265625 0.035064697265625 0.03509521484375 0.035125732421875 0.03515625 0.035186767578125 0.035186767578125 0.03521728515625 0.035247802734375 0.0352783203125 0.035308837890625 0.03533935546875 0.03533935546875 0.035369873046875 0.035400390625 0.035430908203125 0.03546142578125 0.03546142578125 0.035491943359375 0.0355224609375 0.035552978515625 0.03558349609375 0.035614013671875 0.035614013671875 0.03564453125 0.035675048828125 0.03570556640625 0.035736083984375 0.035736083984375 0.0357666015625 0.035797119140625 0.03582763671875 0.035858154296875 0.035888671875 0.035888671875 0.035919189453125 0.03594970703125 0.035980224609375 0.0360107421875 0.0360107421875 0.036041259765625 0.03607177734375 0.036102294921875 0.0361328125 0.036163330078125 0.036163330078125 0.03619384765625 0.036224365234375 0.0362548828125 0.036285400390625 0.036285400390625 0.03631591796875 0.036346435546875 0.036376953125 0.036407470703125 0.03643798828125 0.03643798828125 0.036468505859375 0.0364990234375 0.036529541015625 0.03656005859375 0.03656005859375 0.036590576171875 0.03662109375 0.036651611328125 0.03668212890625 0.036712646484375 0.036712646484375 0.0367431640625 0.036773681640625 0.03680419921875 0.036834716796875 0.036865234375 0.036865234375 0.036895751953125 0.03692626953125 0.036956787109375 0.0369873046875 0.0369873046875 0.037017822265625 0.03704833984375 0.037078857421875 0.037109375 0.037139892578125 0.037139892578125 0.03717041015625 0.037200927734375 0.0372314453125 0.037261962890625 0.037261962890625 0.03729248046875 0.037322998046875 0.037353515625 0.037384033203125 0.03741455078125 0.03741455078125 0.037445068359375 0.0374755859375 0.037506103515625 0.03753662109375 0.03753662109375 0.037567138671875 0.03759765625 0.037628173828125 0.03765869140625 0.037689208984375 0.037689208984375 0.0377197265625 0.037750244140625 0.03778076171875 0.037811279296875 0.037811279296875 0.037841796875 0.037872314453125 0.03790283203125 0.037933349609375 0.0379638671875 0.0379638671875 0.037994384765625 0.03802490234375 0.038055419921875 0.0380859375 0.0380859375 0.038116455078125 0.03814697265625 0.038177490234375 0.0382080078125 0.038238525390625 0.038238525390625 0.03826904296875 0.038299560546875 0.038330078125 0.038360595703125 0.038360595703125 0.03839111328125 0.038421630859375 0.0384521484375 0.038482666015625 0.03851318359375 0.03851318359375 0.038543701171875 0.03857421875 0.038604736328125 0.03863525390625 0.03863525390625 0.038665771484375 0.0386962890625 0.038726806640625 0.03875732421875 0.038787841796875 0.038787841796875 0.038818359375 0.038848876953125 0.03887939453125 0.038909912109375 0.038909912109375 0.0389404296875 0.038970947265625 0.03900146484375 0.039031982421875 0.0390625 0.0390625 0.039093017578125 0.03912353515625 0.039154052734375 0.0391845703125 0.039215087890625 0.039215087890625 0.03924560546875 0.039276123046875 0.039306640625 0.039337158203125 0.039337158203125 0.03936767578125 0.039398193359375 0.0394287109375 0.039459228515625 0.03948974609375 0.03948974609375 0.039520263671875 0.03955078125 0.039581298828125 0.03961181640625 0.03961181640625 0.039642333984375 0.0396728515625 0.039703369140625 0.03973388671875 0.039764404296875 0.039764404296875 0.039794921875 0.039825439453125 0.03985595703125 0.039886474609375 0.039886474609375 0.0399169921875 0.039947509765625 0.03997802734375 0.040008544921875 0.0400390625 0.0400390625 0.040069580078125 0.04010009765625 0.040130615234375 0.0401611328125 0.0401611328125 0.040191650390625 0.04022216796875 0.040252685546875 0.040283203125 0.040313720703125 0.040313720703125 0.04034423828125 0.040374755859375 0.0404052734375 0.040435791015625 0.040435791015625 0.04046630859375 0.040496826171875 0.04052734375 0.040557861328125 0.04058837890625 0.04058837890625 0.040618896484375 0.0406494140625 0.040679931640625 0.04071044921875 0.04071044921875 0.040740966796875 0.040771484375 0.040802001953125 0.04083251953125 0.040863037109375 0.040863037109375 0.0408935546875 0.040924072265625 0.04095458984375 0.040985107421875 0.040985107421875 0.041015625 0.041046142578125 0.04107666015625 0.041107177734375 0.0411376953125 0.0411376953125 0.041168212890625 0.04119873046875 0.041229248046875 0.041259765625 0.041259765625 0.041290283203125 0.04132080078125 0.041351318359375 0.0413818359375 0.041412353515625 0.041412353515625 0.04144287109375 0.041473388671875 0.04150390625 0.041534423828125 0.04156494140625 0.04156494140625 0.041595458984375 0.0416259765625 0.041656494140625 0.04168701171875 0.04168701171875 0.041717529296875 0.041748046875 0.041778564453125 0.04180908203125 0.041839599609375 0.041839599609375 0.0418701171875 0.041900634765625 0.04193115234375 0.041961669921875 0.041961669921875 0.0419921875 0.042022705078125 0.04205322265625 0.042083740234375 0.0421142578125 0.0421142578125 0.042144775390625 0.04217529296875 0.042205810546875 0.042236328125 0.042236328125 0.042266845703125 0.04229736328125 0.042327880859375 0.0423583984375 0.042388916015625 0.042388916015625 0.04241943359375 0.042449951171875 0.04248046875 0.042510986328125 0.042510986328125 0.04254150390625 0.042572021484375 0.0426025390625 0.042633056640625 0.04266357421875 0.04266357421875 0.042694091796875 0.042724609375 0.042755126953125 0.04278564453125 0.04278564453125 0.042816162109375 0.0428466796875 0.042877197265625 0.04290771484375 0.042938232421875 0.042938232421875 0.04296875 0.042999267578125 0.04302978515625 0.043060302734375 0.043060302734375 0.0430908203125 0.043121337890625 0.04315185546875 0.043182373046875 0.043212890625 0.043212890625 0.043243408203125 0.04327392578125 0.043304443359375 0.0433349609375 0.0433349609375 0.043365478515625 0.04339599609375 0.043426513671875 0.04345703125 0.043487548828125 0.043487548828125 0.04351806640625 0.043548583984375 0.0435791015625 0.043609619140625 0.04364013671875 0.04364013671875 0.043670654296875 0.043701171875 0.043731689453125 0.04376220703125 0.04376220703125 0.043792724609375 0.0438232421875 0.043853759765625 0.04388427734375 0.043914794921875 0.043914794921875 0.0439453125 0.043975830078125 0.04400634765625 0.044036865234375 0.044036865234375 0.0440673828125 0.044097900390625 0.04412841796875 0.044158935546875 0.044189453125 0.044189453125 0.044219970703125 0.04425048828125 0.044281005859375 0.0443115234375 0.0443115234375 0.044342041015625 0.04437255859375 0.044403076171875 0.04443359375 0.044464111328125 0.044464111328125 0.04449462890625 0.044525146484375 0.0445556640625 0.044586181640625 0.044586181640625 0.04461669921875 0.044647216796875 0.044677734375 0.044708251953125 0.04473876953125 0.04473876953125 0.044769287109375 0.0447998046875 0.044830322265625 0.04486083984375 0.04486083984375 0.044891357421875 0.044921875 0.044952392578125 0.04498291015625 0.045013427734375 0.045013427734375 0.0450439453125 0.045074462890625 0.04510498046875 0.045135498046875 0.045135498046875 0.045166015625 0.045196533203125 0.04522705078125 0.045257568359375 0.0452880859375 0.0452880859375 0.045318603515625 0.04534912109375 0.045379638671875 0.04541015625 0.04541015625 0.045440673828125 0.04547119140625 0.045501708984375 0.0455322265625 0.045562744140625 0.045562744140625 0.04559326171875 0.045623779296875 0.045654296875 0.045684814453125 0.045684814453125 0.04571533203125 0.045745849609375 0.0457763671875 0.045806884765625 0.04583740234375 0.04583740234375 0.045867919921875 0.0458984375 0.045928955078125 0.04595947265625 0.045989990234375 0.045989990234375 0.0460205078125 0.046051025390625 0.04608154296875 0.046112060546875 0.046112060546875 0.046142578125 0.046173095703125 0.04620361328125 0.046234130859375 0.0462646484375 0.0462646484375 0.046295166015625 0.04632568359375 0.046356201171875 0.04638671875 0.04638671875 0.046417236328125 0.04644775390625 0.046478271484375 0.0465087890625 0.046539306640625 0.046539306640625 0.04656982421875 0.046600341796875 0.046630859375 0.046661376953125 0.046661376953125 0.04669189453125 0.046722412109375 0.0467529296875 0.046783447265625 0.04681396484375 0.04681396484375 0.046844482421875 0.046875 0.046905517578125 0.04693603515625 0.04693603515625 0.046966552734375 0.0469970703125 0.047027587890625 0.04705810546875 0.047088623046875 0.047088623046875 0.047119140625 0.047149658203125 0.04718017578125 0.047210693359375 0.047210693359375 0.0472412109375 0.047271728515625 0.04730224609375 0.047332763671875 0.04736328125 0.04736328125 0.047393798828125 0.04742431640625 0.047454833984375 0.0474853515625 0.0474853515625 0.047515869140625 0.04754638671875 0.047576904296875 0.047607421875 0.047637939453125 0.047637939453125 0.04766845703125 0.047698974609375 0.0477294921875 0.047760009765625 0.047760009765625 0.04779052734375 0.047821044921875 0.0478515625 0.047882080078125 0.04791259765625 0.04791259765625 0.047943115234375 0.0479736328125 0.048004150390625 0.04803466796875 0.048065185546875 0.048065185546875 0.048095703125 0.048126220703125 0.04815673828125 0.048187255859375 0.048187255859375 0.0482177734375 0.048248291015625 0.04827880859375 0.048309326171875 0.04833984375 0.04833984375 0.048370361328125 0.04840087890625 0.048431396484375 0.0484619140625 0.0484619140625 0.048492431640625 0.04852294921875 0.048553466796875 0.048583984375 0.048614501953125 0.048614501953125 0.04864501953125 0.048675537109375 0.0487060546875 0.048736572265625 0.048736572265625 0.04876708984375 0.048797607421875 0.048828125 0.048858642578125 0.04888916015625 0.04888916015625 0.048919677734375 0.0489501953125 0.048980712890625 0.04901123046875 0.04901123046875 0.049041748046875 0.049072265625 0.049102783203125 0.04913330078125 0.049163818359375 0.049163818359375 0.0491943359375 0.049224853515625 0.04925537109375 0.049285888671875 0.049285888671875 0.04931640625 0.049346923828125 0.04937744140625 0.049407958984375 0.0494384765625 0.0494384765625 0.049468994140625 0.04949951171875 0.049530029296875 0.049560546875 0.049560546875 0.049591064453125 0.04962158203125 0.049652099609375 0.0496826171875 0.049713134765625 0.049713134765625 0.04974365234375 0.049774169921875 0.0498046875 0.049835205078125 0.049835205078125 0.04986572265625 0.049896240234375 0.0499267578125 0.049957275390625 0.04998779296875 0.04998779296875 0.050018310546875 0.050048828125 0.050079345703125 0.05010986328125 0.05010986328125 0.050140380859375 0.0501708984375 0.050201416015625 0.05023193359375 0.050262451171875 0.050262451171875 0.05029296875 0.050323486328125 0.05035400390625 0.050384521484375 0.0504150390625 0.0504150390625 0.050445556640625 0.05047607421875 0.050506591796875 0.050537109375 0.050537109375 0.050567626953125 0.05059814453125 0.050628662109375 0.0506591796875 0.050689697265625 0.050689697265625 0.05072021484375 0.050750732421875 0.05078125 0.050811767578125 0.050811767578125 0.05084228515625 0.050872802734375 0.0509033203125 0.050933837890625 0.05096435546875 0.05096435546875 0.050994873046875 0.051025390625 0.051055908203125 0.05108642578125 0.05108642578125 0.051116943359375 0.0511474609375 0.051177978515625 0.05120849609375 0.05120849609375 0.051239013671875 0.051300048828125 0.051300048828125 0.051300048828125 0.051361083984375 0.0513916015625 0.0513916015625 0.0513916015625 0.05145263671875 0.051513671875 0.051513671875 0.051513671875 0.051544189453125 0.051605224609375 0.051605224609375 0.051605224609375 0.0516357421875 0.05169677734375 0.05169677734375 0.05169677734375 0.0517578125 0.051788330078125 0.051788330078125 0.051788330078125 0.051849365234375 0.051910400390625 0.051910400390625 0.051910400390625 0.05194091796875 0.052001953125 0.052001953125 0.052001953125 0.05206298828125 0.052093505859375 0.052093505859375 0.052093505859375 0.052154541015625 0.05218505859375 0.05218505859375 0.05218505859375 0.05224609375 0.05230712890625 0.05230712890625 0.05230712890625 0.052337646484375 0.052398681640625 0.052398681640625 0.052398681640625 0.052459716796875 0.052490234375 0.052490234375 0.052490234375 0.05255126953125 0.0526123046875 0.0526123046875 0.0526123046875 0.052642822265625 0.052703857421875 0.052703857421875 0.052703857421875 0.052764892578125 0.05279541015625 0.05279541015625 0.05279541015625 0.0528564453125 0.052886962890625 0.052886962890625 0.052886962890625 0.052947998046875 0.053009033203125 0.053009033203125 0.053009033203125 0.05303955078125 0.0531005859375 0.0531005859375 0.0531005859375 0.05316162109375 0.053192138671875 0.053192138671875 0.053192138671875 0.053253173828125 0.053314208984375 0.053314208984375 0.053314208984375 0.0533447265625 0.05340576171875 0.05340576171875 0.05340576171875 0.053436279296875 0.053497314453125 0.053497314453125 0.053497314453125 0.053558349609375 0.0535888671875 0.0535888671875 0.0535888671875 0.05364990234375 0.0537109375 0.0537109375 0.0537109375 0.053741455078125 0.053802490234375 0.053802490234375 0.053802490234375 0.053863525390625 0.05389404296875 0.05389404296875 0.05389404296875 0.053955078125 0.053985595703125 0.053985595703125 0.053985595703125 0.054046630859375 0.054107666015625 0.054107666015625 0.054107666015625 0.05413818359375 0.05419921875 0.05419921875 0.05419921875 0.05426025390625 0.054290771484375 0.054290771484375 0.054290771484375 0.054351806640625 0.054412841796875 0.054412841796875 0.054412841796875 0.054443359375 0.05450439453125 0.05450439453125 0.05450439453125 0.054534912109375 0.054595947265625 0.054595947265625 0.054595947265625 0.054656982421875 0.0546875 0.0546875 0.0546875 0.05474853515625 0.0548095703125 0.0548095703125 0.0548095703125 0.054840087890625 0.054901123046875 0.054901123046875 0.054901123046875 0.054962158203125 0.05499267578125 0.05499267578125 0.05499267578125 0.0550537109375 0.05511474609375 0.05511474609375 0.05511474609375 0.055145263671875 0.055206298828125 0.055206298828125 0.055206298828125 0.05523681640625 0.0552978515625 0.0552978515625 0.0552978515625 0.05535888671875 0.055389404296875 0.055389404296875 0.055389404296875 0.055450439453125 0.055511474609375 0.055511474609375 0.055511474609375 0.0555419921875 0.05560302734375 0.05560302734375 0.05560302734375 0.0556640625 0.055694580078125 0.055694580078125 0.055694580078125 0.055755615234375 0.0557861328125 0.0557861328125 0.0557861328125 0.05584716796875 0.055908203125 0.055908203125 0.055908203125 0.055938720703125 0.055999755859375 0.055999755859375 0.055999755859375 0.056060791015625 0.05609130859375 0.05609130859375 0.05609130859375 0.05615234375 0.05621337890625 0.05621337890625 0.05621337890625 0.056243896484375 0.056304931640625 0.056304931640625 0.056304931640625 0.05633544921875 0.056396484375 0.056396484375 0.056396484375 0.05645751953125 0.056488037109375 0.056488037109375 0.056488037109375 0.056549072265625 0.056610107421875 0.056610107421875 0.056610107421875 0.056640625 0.05670166015625 0.05670166015625 0.05670166015625 0.0567626953125 0.056793212890625 0.056793212890625 0.056793212890625 0.056854248046875 0.056884765625 0.056884765625 0.056884765625 0.05694580078125 0.0570068359375 0.0570068359375 0.0570068359375 0.057037353515625 0.057098388671875 0.057098388671875 0.057098388671875 0.057159423828125 0.05718994140625 0.05718994140625 0.05718994140625 0.0572509765625 0.05731201171875 0.05731201171875 0.05731201171875 0.057342529296875 0.057403564453125 0.057403564453125 0.057403564453125 0.057464599609375 0.0574951171875 0.0574951171875 0.0574951171875 0.05755615234375 0.057586669921875 0.057586669921875 0.057586669921875 0.057647705078125 0.057708740234375 0.057708740234375 0.057708740234375 0.0577392578125 0.05780029296875 0.05780029296875 0.05780029296875 0.057861328125 0.057891845703125 0.057891845703125 0.057891845703125 0.057952880859375 0.058013916015625 0.058013916015625 0.058013916015625 0.05804443359375 0.05810546875 0.05810546875 0.05810546875 0.058135986328125 0.058197021484375 0.058197021484375 0.058197021484375 0.058258056640625 0.05828857421875 0.05828857421875 0.05828857421875 0.058349609375 0.05841064453125 0.05841064453125 0.05841064453125 0.058441162109375 0.058502197265625 0.058502197265625 0.058502197265625 0.058563232421875 0.05859375 0.05859375 0.05859375 0.05865478515625 0.058685302734375 0.058685302734375 0.058685302734375 0.058746337890625 0.058807373046875 0.058807373046875 0.058807373046875 0.058837890625 0.05889892578125 0.05889892578125 0.05889892578125 0.0589599609375 0.058990478515625 0.058990478515625 0.058990478515625 0.059051513671875 0.059112548828125 0.059112548828125 0.059112548828125 0.05914306640625 0.0592041015625 0.0592041015625 0.0592041015625 0.05926513671875 0.059295654296875 0.059295654296875 0.059295654296875 0.059356689453125 0.05938720703125 0.05938720703125 0.05938720703125 0.0594482421875 0.05950927734375 0.05950927734375 0.05950927734375 0.059539794921875 0.059600830078125 0.059600830078125 0.059600830078125 0.059661865234375 0.0596923828125 0.0596923828125 0.0596923828125 0.05975341796875 0.059814453125 0.059814453125 0.059814453125 0.059844970703125 0.059906005859375 0.059906005859375 0.059906005859375 0.0599365234375 0.05999755859375 0.05999755859375 0.05999755859375 0.06005859375 0.060089111328125 0.060089111328125 0.060089111328125 0.060150146484375 0.060211181640625 0.060211181640625 0.060211181640625 0.06024169921875 0.060302734375 0.060302734375 0.060302734375 0.06036376953125 0.060394287109375 0.060394287109375 0.060394287109375 0.060455322265625 0.06048583984375 0.06048583984375 0.06048583984375 0.060546875 0.06060791015625 0.06060791015625 0.06060791015625 0.060638427734375 0.060699462890625 0.060699462890625 0.060699462890625 0.060760498046875 0.060791015625 0.060791015625 0.060791015625 0.06085205078125 0.0609130859375 0.0609130859375 0.0609130859375 0.060943603515625 0.061004638671875 0.061004638671875 0.061004638671875 0.06103515625 0.06109619140625 0.06109619140625 0.06109619140625 0.0611572265625 0.061187744140625 0.061187744140625 0.061187744140625 0.061248779296875 0.061309814453125 0.061309814453125 0.061309814453125 0.06134033203125 0.0614013671875 0.0614013671875 0.0614013671875 0.06146240234375 0.061492919921875 0.061492919921875 0.061492919921875 0.061553955078125 0.061614990234375 0.061614990234375 0.061614990234375 0.0616455078125 0.06170654296875 0.06170654296875 0.06170654296875 0.061737060546875 0.061798095703125 0.061798095703125 0.061798095703125 0.061859130859375 0.0618896484375 0.0618896484375 0.0618896484375 0.06195068359375 0.06201171875 0.06201171875 0.06201171875 0.062042236328125 0.062103271484375 0.062103271484375 0.062103271484375 0.062164306640625 0.06219482421875 0.06219482421875 0.06219482421875 0.062255859375 0.062286376953125 0.062286376953125 0.062286376953125 0.062347412109375 0.062408447265625 0.062408447265625 0.062408447265625 0.06243896484375 0.0625 0.0625 0.0625 0.06256103515625 0.0626220703125 0.0626220703125 0.0626220703125 0.0626220703125 0.06268310546875 0.06268310546875 0.06268310546875 0.062744140625 0.06280517578125 0.06280517578125 0.06280517578125 0.0628662109375 0.06292724609375 0.06292724609375 0.06292724609375 0.06292724609375 0.06298828125 0.06298828125 0.06298828125 0.06304931640625 0.0631103515625 0.0631103515625 0.0631103515625 0.06317138671875 0.06317138671875 0.06317138671875 0.06317138671875 0.063232421875 0.06329345703125 0.06329345703125 0.06329345703125 0.0633544921875 0.06341552734375 0.06341552734375 0.06341552734375 0.0634765625 0.0634765625 0.0634765625 0.0634765625 0.06353759765625 0.0635986328125 0.0635986328125 0.0635986328125 0.06365966796875 0.063720703125 0.063720703125 0.063720703125 0.063720703125 0.06378173828125 0.06378173828125 0.06378173828125 0.0638427734375 0.06390380859375 0.06390380859375 0.06390380859375 0.06396484375 0.06402587890625 0.06402587890625 0.06402587890625 0.06402587890625 0.0640869140625 0.0640869140625 0.0640869140625 0.06414794921875 0.064208984375 0.064208984375 0.064208984375 0.06427001953125 0.06427001953125 0.06427001953125 0.06427001953125 0.0643310546875 0.06439208984375 0.06439208984375 0.06439208984375 0.064453125 0.06451416015625 0.06451416015625 0.06451416015625 0.0645751953125 0.0645751953125 0.0645751953125 0.0645751953125 0.06463623046875 0.064697265625 0.064697265625 0.064697265625 0.06475830078125 0.0648193359375 0.0648193359375 0.0648193359375 0.06488037109375 0.06488037109375 0.06488037109375 0.06488037109375 0.06494140625 0.06500244140625 0.06500244140625 0.06500244140625 0.0650634765625 0.06512451171875 0.06512451171875 0.06512451171875 0.06512451171875 0.065185546875 0.065185546875 0.065185546875 0.06524658203125 0.0653076171875 0.0653076171875 0.0653076171875 0.06536865234375 0.0654296875 0.0654296875 0.0654296875 0.0654296875 0.06549072265625 0.06549072265625 0.06549072265625 0.0655517578125 0.06561279296875 0.06561279296875 0.06561279296875 0.065673828125 0.065673828125 0.065673828125 0.065673828125 0.06573486328125 0.0657958984375 0.0657958984375 0.0657958984375 0.06585693359375 0.06591796875 0.06591796875 0.06591796875 0.06597900390625 0.06597900390625 0.06597900390625 0.06597900390625 0.0660400390625 0.06610107421875 0.06610107421875 0.06610107421875 0.066162109375 0.06622314453125 0.06622314453125 0.06622314453125 0.06622314453125 0.0662841796875 0.0662841796875 0.0662841796875 0.06634521484375 0.06640625 0.06640625 0.06640625 0.06646728515625 0.0665283203125 0.0665283203125 0.0665283203125 0.0665283203125 0.06658935546875 0.06658935546875 0.06658935546875 0.066650390625 0.06671142578125 0.06671142578125 0.06671142578125 0.0667724609375 0.0667724609375 0.0667724609375 0.0667724609375 0.06683349609375 0.06689453125 0.06689453125 0.06689453125 0.06695556640625 0.0670166015625 0.0670166015625 0.0670166015625 0.06707763671875 0.06707763671875 0.06707763671875 0.06707763671875 0.067138671875 0.06719970703125 0.06719970703125 0.06719970703125 0.0672607421875 0.06732177734375 0.06732177734375 0.06732177734375 0.06732177734375 0.0673828125 0.0673828125 0.0673828125 0.06744384765625 0.0675048828125 0.0675048828125 0.0675048828125 0.06756591796875 0.067626953125 0.067626953125 0.067626953125 0.067626953125 0.06768798828125 0.06768798828125 0.06768798828125 0.0677490234375 0.06781005859375 0.06781005859375 0.06781005859375 0.06787109375 0.06787109375 0.06787109375 0.06787109375 0.06793212890625 0.0679931640625 0.0679931640625 0.0679931640625 0.06805419921875 0.068115234375 0.068115234375 0.068115234375 0.06817626953125 0.06817626953125 0.06817626953125 0.06817626953125 0.0682373046875 0.06829833984375 0.06829833984375 0.06829833984375 0.068359375 0.06842041015625 0.06842041015625 0.06842041015625 0.06842041015625 0.0684814453125 0.0684814453125 0.0684814453125 0.06854248046875 0.068603515625 0.068603515625 0.068603515625 0.06866455078125 0.0687255859375 0.0687255859375 0.0687255859375 0.0687255859375 0.06878662109375 0.06878662109375 0.06878662109375 0.06884765625 0.06890869140625 0.06890869140625 0.06890869140625 0.0689697265625 0.0689697265625 0.0689697265625 0.0689697265625 0.06903076171875 0.069091796875 0.069091796875 0.069091796875 0.06915283203125 0.0692138671875 0.0692138671875 0.0692138671875 0.06927490234375 0.06927490234375 0.06927490234375 0.06927490234375 0.0693359375 0.06939697265625 0.06939697265625 0.06939697265625 0.0694580078125 0.06951904296875 0.06951904296875 0.06951904296875 0.069580078125 0.069580078125 0.069580078125 0.069580078125 0.06964111328125 0.0697021484375 0.0697021484375 0.0697021484375 0.06976318359375 0.06982421875 0.06982421875 0.06982421875 0.06982421875 0.06988525390625 0.06988525390625 0.06988525390625 0.0699462890625 0.07000732421875 0.07000732421875 0.07000732421875 0.070068359375 0.07012939453125 0.07012939453125 0.07012939453125 0.07012939453125 0.0701904296875 0.0701904296875 0.0701904296875 0.07025146484375 0.0703125 0.0703125 0.0703125 0.07037353515625 0.07037353515625 0.07037353515625 0.07037353515625 0.0704345703125 0.07049560546875 0.07049560546875 0.07049560546875 0.070556640625 0.07061767578125 0.07061767578125 0.07061767578125 0.0706787109375 0.0706787109375 0.0706787109375 0.0706787109375 0.07073974609375 0.07080078125 0.07080078125 0.07080078125 0.07086181640625 0.0709228515625 0.0709228515625 0.0709228515625 0.0709228515625 0.07098388671875 0.07098388671875 0.07098388671875 0.071044921875 0.07110595703125 0.07110595703125 0.07110595703125 0.0711669921875 0.07122802734375 0.07122802734375 0.07122802734375 0.07122802734375 0.0712890625 0.0712890625 0.0712890625 0.07135009765625 0.0714111328125 0.0714111328125 0.0714111328125 0.07147216796875 0.07147216796875 0.07147216796875 0.07147216796875 0.071533203125 0.07159423828125 0.07159423828125 0.07159423828125 0.0716552734375 0.07171630859375 0.07171630859375 0.07171630859375 0.07177734375 0.07177734375 0.07177734375 0.07177734375 0.07183837890625 0.0718994140625 0.0718994140625 0.0718994140625 0.07196044921875 0.072021484375 0.072021484375 0.072021484375 0.072021484375 0.07208251953125 0.07208251953125 0.07208251953125 0.0721435546875 0.07220458984375 0.07220458984375 0.07220458984375 0.072265625 0.07232666015625 0.07232666015625 0.07232666015625 0.07232666015625 0.0723876953125 0.0723876953125 0.0723876953125 0.07244873046875 0.072509765625 0.072509765625 0.072509765625 0.07257080078125 0.07257080078125 0.07257080078125 0.07257080078125 0.0726318359375 0.07269287109375 0.07269287109375 0.07269287109375 0.07275390625 0.07281494140625 0.07281494140625 0.07281494140625 0.0728759765625 0.0728759765625 0.0728759765625 0.0728759765625 0.07293701171875 0.072998046875 0.072998046875 0.072998046875 0.07305908203125 0.0731201171875 0.0731201171875 0.0731201171875 0.0731201171875 0.07318115234375 0.07318115234375 0.07318115234375 0.0732421875 0.07330322265625 0.07330322265625 0.07330322265625 0.0733642578125 0.07342529296875 0.07342529296875 0.07342529296875 0.07342529296875 0.073486328125 0.073486328125 0.073486328125 0.07354736328125 0.0736083984375 0.0736083984375 0.0736083984375 0.07366943359375 0.07373046875 0.07373046875 0.07373046875 0.07373046875 0.07379150390625 0.07379150390625 0.07379150390625 0.0738525390625 0.07391357421875 0.07391357421875 0.07391357421875 0.073974609375 0.073974609375 0.073974609375 0.073974609375 0.07403564453125 0.0740966796875 0.0740966796875 0.0740966796875 0.07415771484375 0.07421875 0.07421875 0.07421875 0.07427978515625 0.07427978515625 0.07427978515625 0.07427978515625 0.0743408203125 0.07440185546875 0.07440185546875 0.07440185546875 0.074462890625 0.07452392578125 0.07452392578125 0.07452392578125 0.07452392578125 0.0745849609375 0.0745849609375 0.0745849609375 0.07464599609375 0.07470703125 0.07470703125 0.07470703125 0.07476806640625 0.0748291015625 0.0748291015625 0.0748291015625 0.0748291015625 0.07489013671875 0.07489013671875 0.07489013671875 0.074951171875 0.07501220703125 0.07501220703125 0.07501220703125 0.0750732421875 0.0750732421875 0.0750732421875 0.0750732421875 0.07513427734375 0.0751953125 0.0751953125 0.0751953125 0.07525634765625 0.0753173828125 0.0753173828125 0.0753173828125 0.07537841796875 0.07537841796875 0.07537841796875 0.07537841796875 0.075439453125 0.07550048828125 0.07550048828125 0.07550048828125 0.0755615234375 0.07562255859375 0.07562255859375 0.07562255859375 0.07562255859375 0.07568359375 0.07568359375 0.07568359375 0.07574462890625 0.0758056640625 0.0758056640625 0.0758056640625 0.07586669921875 0.075927734375 0.075927734375 0.075927734375 0.075927734375 0.07598876953125 0.07598876953125 0.07598876953125 0.0760498046875 0.07611083984375 0.07611083984375 0.07611083984375 0.076171875 0.076171875 0.076171875 0.076171875 0.07623291015625 0.0762939453125 0.0762939453125 0.0762939453125 0.07635498046875 0.076416015625 0.076416015625 0.076416015625 0.07647705078125 0.07647705078125 0.07647705078125 0.07647705078125 0.0765380859375 0.07659912109375 0.07659912109375 0.07659912109375 0.07666015625 0.07672119140625 0.07672119140625 0.07672119140625 0.07672119140625 0.0767822265625 0.0767822265625 0.0767822265625 0.07684326171875 0.076904296875 0.076904296875 0.076904296875 0.07696533203125 0.0770263671875 0.0770263671875 0.0770263671875 0.0770263671875 0.07708740234375 0.07708740234375 0.07708740234375 0.0771484375 0.07720947265625 0.07720947265625 0.07720947265625 0.0772705078125 0.0772705078125 0.0772705078125 0.0772705078125 0.07733154296875 0.077392578125 0.077392578125 0.077392578125 0.07745361328125 0.0775146484375 0.0775146484375 0.0775146484375 0.07757568359375 0.07757568359375 0.07757568359375 0.07757568359375 0.07763671875 0.07769775390625 0.07769775390625 0.07769775390625 0.0777587890625 0.07781982421875 0.07781982421875 0.07781982421875 0.07781982421875 0.077880859375 0.077880859375 0.077880859375 0.07794189453125 0.0780029296875 0.0780029296875 0.0780029296875 0.07806396484375 0.078125 0.078125 0.078125 0.078125 0.07818603515625 0.07818603515625 0.07818603515625 0.0782470703125 0.07830810546875 0.07830810546875 0.07830810546875 0.078369140625 0.07843017578125 0.07843017578125 0.07843017578125 0.07843017578125 0.0784912109375 0.0784912109375 0.0784912109375 0.07855224609375 0.07861328125 0.07861328125 0.07861328125 0.07867431640625 0.07867431640625 0.07867431640625 0.07867431640625 0.0787353515625 0.07879638671875 0.07879638671875 0.07879638671875 0.078857421875 0.07891845703125 0.07891845703125 0.07891845703125 0.0789794921875 0.0789794921875 0.0789794921875 0.0789794921875 0.07904052734375 0.0791015625 0.0791015625 0.0791015625 0.07916259765625 0.0792236328125 0.0792236328125 0.0792236328125 0.0792236328125 0.07928466796875 0.07928466796875 0.07928466796875 0.079345703125 0.07940673828125 0.07940673828125 0.07940673828125 0.0794677734375 0.07952880859375 0.07952880859375 0.07952880859375 0.07952880859375 0.07958984375 0.07958984375 0.07958984375 0.07965087890625 0.0797119140625 0.0797119140625 0.0797119140625 0.07977294921875 0.07977294921875 0.07977294921875 0.07977294921875 0.079833984375 0.07989501953125 0.07989501953125 0.07989501953125 0.0799560546875 0.08001708984375 0.08001708984375 0.08001708984375 0.080078125 0.080078125 0.080078125 0.080078125 0.08013916015625 0.0802001953125 0.0802001953125 0.0802001953125 0.08026123046875 0.080322265625 0.080322265625 0.080322265625 0.080322265625 0.08038330078125 0.08038330078125 0.08038330078125 0.0804443359375 0.08050537109375 0.08050537109375 0.08050537109375 0.08056640625 0.08062744140625 0.08062744140625 0.08062744140625 0.08062744140625 0.0806884765625 0.0806884765625 0.0806884765625 0.08074951171875 0.080810546875 0.080810546875 0.080810546875 0.08087158203125 0.08087158203125 0.08087158203125 0.08087158203125 0.0809326171875 0.08099365234375 0.08099365234375 0.08099365234375 0.0810546875 0.08111572265625 0.08111572265625 0.08111572265625 0.0811767578125 0.0811767578125 0.0811767578125 0.0811767578125 0.08123779296875 0.081298828125 0.081298828125 0.081298828125 0.08135986328125 0.0814208984375 0.0814208984375 0.0814208984375 0.0814208984375 0.08148193359375 0.08148193359375 0.08148193359375 0.08154296875 0.08160400390625 0.08160400390625 0.08160400390625 0.0816650390625 0.08172607421875 0.08172607421875 0.08172607421875 0.08172607421875 0.081787109375 0.081787109375 0.081787109375 0.08184814453125 0.0819091796875 0.0819091796875 0.0819091796875 0.08197021484375 0.08197021484375 0.08197021484375 0.08197021484375 0.08203125 0.08209228515625 0.08209228515625 0.08209228515625 0.0821533203125 0.08221435546875 0.08221435546875 0.08221435546875 0.082275390625 0.082275390625 0.082275390625 0.082275390625 0.08233642578125 0.0823974609375 0.0823974609375 0.0823974609375 0.08245849609375 0.08251953125 0.08251953125 0.08251953125 0.08251953125 0.08258056640625 0.08258056640625 0.08258056640625 0.0826416015625 0.08270263671875 0.08270263671875 0.08270263671875 0.082763671875 0.08282470703125 0.08282470703125 0.08282470703125 0.08282470703125 0.0828857421875 0.0828857421875 0.0828857421875 0.08294677734375 0.0830078125 0.0830078125 0.0830078125 0.08306884765625 0.0831298828125 0.0831298828125 0.0831298828125 0.0831298828125 0.08319091796875 0.08319091796875 0.08319091796875 0.083251953125 0.08331298828125 0.08331298828125 0.08331298828125 0.0833740234375 0.0833740234375 0.0833740234375 0.0833740234375 0.08343505859375 0.08349609375 0.08349609375 0.08349609375 0.08355712890625 0.0836181640625 0.0836181640625 0.0836181640625 0.08367919921875 0.08367919921875 0.08367919921875 0.08367919921875 0.083740234375 0.08380126953125 0.08380126953125 0.08380126953125 0.0838623046875 0.08392333984375 0.08392333984375 0.08392333984375 0.08392333984375 0.083984375 0.083984375 0.083984375 0.08404541015625 0.0841064453125 0.0841064453125 0.0841064453125 0.08416748046875 0.084228515625 0.084228515625 0.084228515625 0.084228515625 0.08428955078125 0.08428955078125 0.08428955078125 0.0843505859375 0.08441162109375 0.08441162109375 0.08441162109375 0.08447265625 0.08447265625 0.08447265625 0.08447265625 0.08453369140625 0.0845947265625 0.0845947265625 0.0845947265625 0.08465576171875 0.084716796875 0.084716796875 0.084716796875 0.08477783203125 0.08477783203125 0.08477783203125 0.08477783203125 0.0848388671875 0.08489990234375 0.08489990234375 0.08489990234375 0.0849609375 0.08502197265625 0.08502197265625 0.08502197265625 0.08502197265625 0.0850830078125 0.0850830078125 0.0850830078125 0.08514404296875 0.085205078125 0.085205078125 0.085205078125 0.08526611328125 0.0853271484375 0.0853271484375 0.0853271484375 0.0853271484375 0.08538818359375 0.08538818359375 0.08538818359375 0.08544921875 0.08551025390625 0.08551025390625 0.08551025390625 0.0855712890625 0.0855712890625 0.0855712890625 0.0855712890625 0.08563232421875 0.085693359375 0.085693359375 0.085693359375 0.08575439453125 0.0858154296875 0.0858154296875 0.0858154296875 0.08587646484375 0.08587646484375 0.08587646484375 0.08587646484375 0.0859375 0.08599853515625 0.08599853515625 0.08599853515625 0.0860595703125 0.08612060546875 0.08612060546875 0.08612060546875 0.08612060546875 0.086181640625 0.086181640625 0.086181640625 0.08624267578125 0.0863037109375 0.0863037109375 0.0863037109375 0.08636474609375 0.08642578125 0.08642578125 0.08642578125 0.08642578125 0.08648681640625 0.08648681640625 0.08648681640625 0.0865478515625 0.08660888671875 0.08660888671875 0.08660888671875 0.086669921875 0.086669921875 0.086669921875 0.086669921875 0.08673095703125 0.0867919921875 0.0867919921875 0.0867919921875 0.08685302734375 0.0869140625 0.0869140625 0.0869140625 0.08697509765625 0.08697509765625 0.08697509765625 0.08697509765625 0.0870361328125 0.08709716796875 0.08709716796875 0.08709716796875 0.087158203125 0.08721923828125 0.08721923828125 0.08721923828125 0.0872802734375 0.0872802734375 0.0872802734375 0.0872802734375 0.08734130859375 0.08740234375 0.08740234375 0.08740234375 0.08746337890625 0.0875244140625 0.0875244140625 0.0875244140625 0.0875244140625 0.08758544921875 0.08758544921875 0.08758544921875 0.087646484375 0.08770751953125 0.08770751953125 0.08770751953125 0.0877685546875 0.08782958984375 0.08782958984375 0.08782958984375 0.08782958984375 0.087890625 0.087890625 0.087890625 0.08795166015625 0.0880126953125 0.0880126953125 0.0880126953125 0.08807373046875 0.08807373046875 0.08807373046875 0.08807373046875 0.088134765625 0.08819580078125 0.08819580078125 0.08819580078125 0.0882568359375 0.08831787109375 0.08831787109375 0.08831787109375 0.08837890625 0.08837890625 0.08837890625 0.08837890625 0.08843994140625 0.0885009765625 0.0885009765625 0.0885009765625 0.08856201171875 0.088623046875 0.088623046875 0.088623046875 0.088623046875 0.08868408203125 0.08868408203125 0.08868408203125 0.0887451171875 0.08880615234375 0.08880615234375 0.08880615234375 0.0888671875 0.08892822265625 0.08892822265625 0.08892822265625 0.08892822265625 0.0889892578125 0.0889892578125 0.0889892578125 0.08905029296875 0.089111328125 0.089111328125 0.089111328125 0.08917236328125 0.08917236328125 0.08917236328125 0.08917236328125 0.0892333984375 0.08929443359375 0.08929443359375 0.08929443359375 0.08935546875 0.08941650390625 0.08941650390625 0.08941650390625 0.0894775390625 0.0894775390625 0.0894775390625 0.0894775390625 0.08953857421875 0.089599609375 0.089599609375 0.089599609375 0.08966064453125 0.0897216796875 0.0897216796875 0.0897216796875 0.0897216796875 0.08978271484375 0.08978271484375 0.08978271484375 0.08984375 0.08990478515625 0.08990478515625 0.08990478515625 0.0899658203125 0.09002685546875 0.09002685546875 0.09002685546875 0.09002685546875 0.090087890625 0.090087890625 0.090087890625 0.09014892578125 0.0902099609375 0.0902099609375 0.0902099609375 0.09027099609375 0.09027099609375 0.09027099609375 0.09027099609375 0.09033203125 0.09039306640625 0.09039306640625 0.09039306640625 0.0904541015625 0.09051513671875 0.09051513671875 0.09051513671875 0.090576171875 0.090576171875 0.090576171875 0.090576171875 0.09063720703125 0.0906982421875 0.0906982421875 0.0906982421875 0.09075927734375 0.0908203125 0.0908203125 0.0908203125 0.0908203125 0.09088134765625 0.09088134765625 0.09088134765625 0.0909423828125 0.09100341796875 0.09100341796875 0.09100341796875 0.091064453125 0.09112548828125 0.09112548828125 0.09112548828125 0.09112548828125 0.0911865234375 0.0911865234375 0.0911865234375 0.09124755859375 0.09130859375 0.09130859375 0.09130859375 0.09136962890625 0.09136962890625 0.09136962890625 0.09136962890625 0.0914306640625 0.09149169921875 0.09149169921875 0.09149169921875 0.091552734375 0.09161376953125 0.09161376953125 0.09161376953125 0.0916748046875 0.0916748046875 0.0916748046875 0.0916748046875 0.09173583984375 0.091796875 0.091796875 0.091796875 0.09185791015625 0.0919189453125 0.0919189453125 0.0919189453125 0.09197998046875 0.09197998046875 0.09197998046875 0.09197998046875 0.092041015625 0.09210205078125 0.09210205078125 0.09210205078125 0.0921630859375 0.09222412109375 0.09222412109375 0.09222412109375 0.09222412109375 0.09228515625 0.09228515625 0.09228515625 0.09234619140625 0.0924072265625 0.0924072265625 0.0924072265625 0.09246826171875 0.092529296875 0.092529296875 0.092529296875 0.092529296875 0.09259033203125 0.09259033203125 0.09259033203125 0.0926513671875 0.09271240234375 0.09271240234375 0.09271240234375 0.0927734375 0.0927734375 0.0927734375 0.0927734375 0.09283447265625 0.0928955078125 0.0928955078125 0.0928955078125 0.09295654296875 0.093017578125 0.093017578125 0.093017578125 0.09307861328125 0.09307861328125 0.09307861328125 0.09307861328125 0.0931396484375 0.09320068359375 0.09320068359375 0.09320068359375 0.09326171875 0.09332275390625 0.09332275390625 0.09332275390625 0.09332275390625 0.0933837890625 0.0933837890625 0.0933837890625 0.09344482421875 0.093505859375 0.093505859375 0.093505859375 0.09356689453125 0.0936279296875 0.0936279296875 0.0936279296875 0.0936279296875 0.09368896484375 0.09368896484375 0.09368896484375 0.09375 0.09381103515625 0.09381103515625 0.09381103515625 0.0938720703125 0.0938720703125 0.0938720703125 0.0938720703125 0.09393310546875 0.093994140625 0.093994140625 0.093994140625 0.09405517578125 0.0941162109375 0.0941162109375 0.0941162109375 0.09417724609375 0.09417724609375 0.09417724609375 0.09417724609375 0.09423828125 0.09429931640625 0.09429931640625 0.09429931640625 0.0943603515625 0.09442138671875 0.09442138671875 0.09442138671875 0.09442138671875 0.094482421875 0.094482421875 0.094482421875 0.09454345703125 0.0946044921875 0.0946044921875 0.0946044921875 0.09466552734375 0.0947265625 0.0947265625 0.0947265625 0.0947265625 0.09478759765625 0.09478759765625 0.09478759765625 0.0948486328125 0.09490966796875 0.09490966796875 0.09490966796875 0.094970703125 0.094970703125 0.094970703125 0.094970703125 0.09503173828125 0.0950927734375 0.0950927734375 0.0950927734375 0.09515380859375 0.09521484375 0.09521484375 0.09521484375 0.09527587890625 0.09527587890625 0.09527587890625 0.09527587890625 0.0953369140625 0.09539794921875 0.09539794921875 0.09539794921875 0.095458984375 0.09552001953125 0.09552001953125 0.09552001953125 0.09552001953125 0.0955810546875 0.0955810546875 0.0955810546875 0.09564208984375 0.095703125 0.095703125 0.095703125 0.09576416015625 0.0958251953125 0.0958251953125 0.0958251953125 0.0958251953125 0.09588623046875 0.09588623046875 0.09588623046875 0.095947265625 0.09600830078125 0.09600830078125 0.09600830078125 0.0960693359375 0.09613037109375 0.09613037109375 0.09613037109375 0.09613037109375 0.09619140625 0.09619140625 0.09619140625 0.09625244140625 0.0963134765625 0.0963134765625 0.0963134765625 0.09637451171875 0.09637451171875 0.09637451171875 0.09637451171875 0.096435546875 0.09649658203125 0.09649658203125 0.09649658203125 0.0965576171875 0.09661865234375 0.09661865234375 0.09661865234375 0.0966796875 0.0966796875 0.0966796875 0.0966796875 0.09674072265625 0.0968017578125 0.0968017578125 0.0968017578125 0.09686279296875 0.096923828125 0.096923828125 0.096923828125 0.096923828125 0.09698486328125 0.09698486328125 0.09698486328125 0.0970458984375 0.09710693359375 0.09710693359375 0.09710693359375 0.09716796875 0.09722900390625 0.09722900390625 0.09722900390625 0.09722900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.09735107421875 0.097412109375 0.097412109375 0.097412109375 0.09747314453125 0.09747314453125 0.09747314453125 0.09747314453125 0.0975341796875 0.09759521484375 0.09759521484375 0.09759521484375 0.09765625 0.09771728515625 0.09771728515625 0.09771728515625 0.0977783203125 0.0977783203125 0.0977783203125 0.0977783203125 0.09783935546875 0.097900390625 0.097900390625 0.097900390625 0.09796142578125 0.0980224609375 0.0980224609375 0.0980224609375 0.0980224609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09814453125 0.09820556640625 0.09820556640625 0.09820556640625 0.0982666015625 0.09832763671875 0.09832763671875 0.09832763671875 0.09832763671875 0.098388671875 0.098388671875 0.098388671875 0.09844970703125 0.0985107421875 0.0985107421875 0.0985107421875 0.09857177734375 0.09857177734375 0.09857177734375 0.09857177734375 0.0986328125 0.09869384765625 0.09869384765625 0.09869384765625 0.0987548828125 0.09881591796875 0.09881591796875 0.09881591796875 0.098876953125 0.098876953125 0.098876953125 0.098876953125 0.09893798828125 0.0989990234375 0.0989990234375 0.0989990234375 0.09906005859375 0.09912109375 0.09912109375 0.09912109375 0.09912109375 0.09918212890625 0.09918212890625 0.09918212890625 0.0992431640625 0.09930419921875 0.09930419921875 0.09930419921875 0.099365234375 0.09942626953125 0.09942626953125 0.09942626953125 0.09942626953125 0.0994873046875 0.0994873046875 0.0994873046875 0.09954833984375 0.099609375 0.099609375 0.099609375 0.09967041015625 0.09967041015625 0.09967041015625 0.09967041015625 0.0997314453125 0.09979248046875 0.09979248046875 0.09979248046875 0.099853515625 0.09991455078125 0.09991455078125 0.09991455078125 0.0999755859375 0.0999755859375 0.0999755859375 0.0999755859375 0.10003662109375 0.10009765625 0.10009765625 0.10009765625 0.10015869140625 0.1002197265625 0.1002197265625 0.1002197265625 0.1002197265625 0.10028076171875 0.10028076171875 0.10028076171875 0.100341796875 0.10040283203125 0.10040283203125 0.10040283203125 0.1004638671875 0.10052490234375 0.10052490234375 0.10052490234375 0.10052490234375 0.1005859375 0.1005859375 0.1005859375 0.10064697265625 0.1007080078125 0.1007080078125 0.1007080078125 0.10076904296875 0.100830078125 0.100830078125 0.100830078125 0.100830078125 0.10089111328125 0.10089111328125 0.10089111328125 0.1009521484375 0.10101318359375 0.10101318359375 0.10101318359375 0.10107421875 0.10107421875 0.10107421875 0.10107421875 0.10113525390625 0.1011962890625 0.1011962890625 0.1011962890625 0.10125732421875 0.101318359375 0.101318359375 0.101318359375 0.10137939453125 0.10137939453125 0.10137939453125 0.10137939453125 0.1014404296875 0.10150146484375 0.10150146484375 0.10150146484375 0.1015625 0.10162353515625 0.10162353515625 0.10162353515625 0.10162353515625 0.1016845703125 0.1016845703125 0.1016845703125 0.10174560546875 0.101806640625 0.101806640625 0.101806640625 0.10186767578125 0.1019287109375 0.1019287109375 0.1019287109375 0.1019287109375 0.10198974609375 0.10198974609375 0.10198974609375 0.10205078125 0.10211181640625 0.10211181640625 0.10211181640625 0.1021728515625 0.1021728515625 0.1021728515625 0.1021728515625 0.10223388671875 0.102294921875 0.102294921875 0.102294921875 0.10235595703125 0.1024169921875 0.1024169921875 0.1024169921875 0.1024169921875 0.10247802734375 0.10247802734375 0.10247802734375 0.10260009765625 0.10260009765625 0.10260009765625 0.10260009765625 0.10260009765625 0.10272216796875 0.10272216796875 0.10272216796875 0.102783203125 0.102783203125 0.102783203125 0.102783203125 0.102783203125 0.1029052734375 0.1029052734375 0.1029052734375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.10308837890625 0.10308837890625 0.10308837890625 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.103271484375 0.103271484375 0.103271484375 0.1033935546875 0.1033935546875 0.1033935546875 0.1033935546875 0.1033935546875 0.103515625 0.103515625 0.103515625 0.10357666015625 0.10357666015625 0.10357666015625 0.10357666015625 0.10357666015625 0.10369873046875 0.10369873046875 0.10369873046875 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.1038818359375 0.1038818359375 0.1038818359375 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.1041259765625 0.1041259765625 0.1041259765625 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.10430908203125 0.10430908203125 0.10430908203125 0.1043701171875 0.1043701171875 0.1043701171875 0.1043701171875 0.1043701171875 0.1044921875 0.1044921875 0.1044921875 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.10467529296875 0.10467529296875 0.10467529296875 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10491943359375 0.10491943359375 0.10491943359375 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.1051025390625 0.1051025390625 0.1051025390625 0.105224609375 0.105224609375 0.105224609375 0.105224609375 0.105224609375 0.10528564453125 0.10528564453125 0.10528564453125 0.10540771484375 0.10540771484375 0.10540771484375 0.10540771484375 0.10540771484375 0.10552978515625 0.10552978515625 0.10552978515625 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.105712890625 0.105712890625 0.105712890625 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.10589599609375 0.10589599609375 0.10589599609375 0.10601806640625 0.10601806640625 0.10601806640625 0.10601806640625 0.10601806640625 0.1060791015625 0.1060791015625 0.1060791015625 0.106201171875 0.106201171875 0.106201171875 0.106201171875 0.106201171875 0.1063232421875 0.1063232421875 0.1063232421875 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10650634765625 0.10650634765625 0.10650634765625 0.10662841796875 0.10662841796875 0.10662841796875 0.10662841796875 0.10662841796875 0.106689453125 0.106689453125 0.106689453125 0.1068115234375 0.1068115234375 0.1068115234375 0.1068115234375 0.1068115234375 0.10687255859375 0.10687255859375 0.10687255859375 0.10699462890625 0.10699462890625 0.10699462890625 0.10699462890625 0.10699462890625 0.10711669921875 0.10711669921875 0.10711669921875 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.1072998046875 0.1072998046875 0.1072998046875 0.107421875 0.107421875 0.107421875 0.107421875 0.107421875 0.10748291015625 0.10748291015625 0.10748291015625 0.10760498046875 0.10760498046875 0.10760498046875 0.10760498046875 0.10760498046875 0.10772705078125 0.10772705078125 0.10772705078125 0.1077880859375 0.1077880859375 0.1077880859375 0.1077880859375 0.1077880859375 0.10791015625 0.10791015625 0.10791015625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10809326171875 0.10809326171875 0.10809326171875 0.10821533203125 0.10821533203125 0.10821533203125 0.10821533203125 0.10821533203125 0.1082763671875 0.1082763671875 0.1082763671875 0.1083984375 0.1083984375 0.1083984375 0.1083984375 0.1083984375 0.1085205078125 0.1085205078125 0.1085205078125 0.10858154296875 0.10858154296875 0.10858154296875 0.10858154296875 0.10858154296875 0.10870361328125 0.10870361328125 0.10870361328125 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10888671875 0.10888671875 0.10888671875 0.1090087890625 0.1090087890625 0.1090087890625 0.1090087890625 0.1090087890625 0.10906982421875 0.10906982421875 0.10906982421875 0.10919189453125 0.10919189453125 0.10919189453125 0.10919189453125 0.10919189453125 0.10931396484375 0.10931396484375 0.10931396484375 0.109375 0.109375 0.109375 0.109375 0.109375 0.1094970703125 0.1094970703125 0.1094970703125 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.10968017578125 0.10968017578125 0.10968017578125 0.10980224609375 0.10980224609375 0.10980224609375 0.10980224609375 0.10980224609375 0.10992431640625 0.10992431640625 0.10992431640625 0.1099853515625 0.1099853515625 0.1099853515625 0.1099853515625 0.1099853515625 0.110107421875 0.110107421875 0.110107421875 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.11029052734375 0.11029052734375 0.11029052734375 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.1104736328125 0.1104736328125 0.1104736328125 0.110595703125 0.110595703125 0.110595703125 0.110595703125 0.110595703125 0.1107177734375 0.1107177734375 0.1107177734375 0.11077880859375 0.11077880859375 0.11077880859375 0.11077880859375 0.11077880859375 0.11090087890625 0.11090087890625 0.11090087890625 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.111083984375 0.111083984375 0.111083984375 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.111328125 0.111328125 0.111328125 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.11151123046875 0.11151123046875 0.11151123046875 0.111572265625 0.111572265625 0.111572265625 0.111572265625 0.111572265625 0.1116943359375 0.1116943359375 0.1116943359375 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.11187744140625 0.11187744140625 0.11187744140625 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.11212158203125 0.11212158203125 0.11212158203125 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.1123046875 0.1123046875 0.1123046875 0.1124267578125 0.1124267578125 0.1124267578125 0.1124267578125 0.1124267578125 0.11248779296875 0.11248779296875 0.11248779296875 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.1126708984375 0.1126708984375 0.1126708984375 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.1129150390625 0.1129150390625 0.1129150390625 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.11309814453125 0.11309814453125 0.11309814453125 0.11322021484375 0.11322021484375 0.11322021484375 0.11322021484375 0.11322021484375 0.11328125 0.11328125 0.11328125 0.1134033203125 0.1134033203125 0.1134033203125 0.1134033203125 0.1134033203125 0.113525390625 0.113525390625 0.113525390625 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11370849609375 0.11370849609375 0.11370849609375 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.1138916015625 0.1138916015625 0.1138916015625 0.114013671875 0.114013671875 0.114013671875 0.114013671875 0.114013671875 0.11407470703125 0.11407470703125 0.11407470703125 0.11419677734375 0.11419677734375 0.11419677734375 0.11419677734375 0.11419677734375 0.11431884765625 0.11431884765625 0.11431884765625 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.114501953125 0.114501953125 0.114501953125 0.1146240234375 0.1146240234375 0.1146240234375 0.1146240234375 0.1146240234375 0.11468505859375 0.11468505859375 0.11468505859375 0.11480712890625 0.11480712890625 0.11480712890625 0.11480712890625 0.11480712890625 0.11492919921875 0.11492919921875 0.11492919921875 0.114990234375 0.114990234375 0.114990234375 0.114990234375 0.114990234375 0.1151123046875 0.1151123046875 0.1151123046875 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11529541015625 0.11529541015625 0.11529541015625 0.11541748046875 0.11541748046875 0.11541748046875 0.11541748046875 0.11541748046875 0.115478515625 0.115478515625 0.115478515625 0.1156005859375 0.1156005859375 0.1156005859375 0.1156005859375 0.1156005859375 0.11572265625 0.11572265625 0.11572265625 0.11578369140625 0.11578369140625 0.11578369140625 0.11578369140625 0.11578369140625 0.11590576171875 0.11590576171875 0.11590576171875 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.1160888671875 0.1160888671875 0.1160888671875 0.1162109375 0.1162109375 0.1162109375 0.1162109375 0.1162109375 0.11627197265625 0.11627197265625 0.11627197265625 0.11639404296875 0.11639404296875 0.11639404296875 0.11639404296875 0.11639404296875 0.11651611328125 0.11651611328125 0.11651611328125 0.1165771484375 0.1165771484375 0.1165771484375 0.1165771484375 0.1165771484375 0.11669921875 0.11669921875 0.11669921875 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.11688232421875 0.11688232421875 0.11688232421875 0.11700439453125 0.11700439453125 0.11700439453125 0.11700439453125 0.11700439453125 0.11712646484375 0.11712646484375 0.11712646484375 0.1171875 0.1171875 0.1171875 0.1171875 0.1171875 0.1173095703125 0.1173095703125 0.1173095703125 0.11737060546875 0.11737060546875 0.11737060546875 0.11737060546875 0.11737060546875 0.11749267578125 0.11749267578125 0.11749267578125 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11767578125 0.11767578125 0.11767578125 0.1177978515625 0.1177978515625 0.1177978515625 0.1177978515625 0.1177978515625 0.117919921875 0.117919921875 0.117919921875 0.11798095703125 0.11798095703125 0.11798095703125 0.11798095703125 0.11798095703125 0.11810302734375 0.11810302734375 0.11810302734375 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.1182861328125 0.1182861328125 0.1182861328125 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.1185302734375 0.1185302734375 0.1185302734375 0.11859130859375 0.11859130859375 0.11859130859375 0.11859130859375 0.11859130859375 0.11871337890625 0.11871337890625 0.11871337890625 0.1187744140625 0.1187744140625 0.1187744140625 0.1187744140625 0.1187744140625 0.118896484375 0.118896484375 0.118896484375 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.11907958984375 0.11907958984375 0.11907958984375 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.11932373046875 0.11932373046875 0.11932373046875 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.1195068359375 0.1195068359375 0.1195068359375 0.11962890625 0.11962890625 0.11962890625 0.11962890625 0.11962890625 0.11968994140625 0.11968994140625 0.11968994140625 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.119873046875 0.119873046875 0.119873046875 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.1201171875 0.1201171875 0.1201171875 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.12030029296875 0.12030029296875 0.12030029296875 0.12042236328125 0.12042236328125 0.12042236328125 0.12042236328125 0.12042236328125 0.1204833984375 0.1204833984375 0.1204833984375 0.12060546875 0.12060546875 0.12060546875 0.12060546875 0.12060546875 0.1207275390625 0.1207275390625 0.1207275390625 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.12091064453125 0.12091064453125 0.12091064453125 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.12109375 0.12109375 0.12109375 0.1212158203125 0.1212158203125 0.1212158203125 0.1212158203125 0.1212158203125 0.12127685546875 0.12127685546875 0.12127685546875 0.12139892578125 0.12139892578125 0.12139892578125 0.12139892578125 0.12139892578125 0.12152099609375 0.12152099609375 0.12152099609375 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.1217041015625 0.1217041015625 0.1217041015625 0.121826171875 0.121826171875 0.121826171875 0.121826171875 0.121826171875 0.12188720703125 0.12188720703125 0.12188720703125 0.12200927734375 0.12200927734375 0.12200927734375 0.12200927734375 0.12200927734375 0.1220703125 0.1220703125 0.1220703125 0.1221923828125 0.1221923828125 0.1221923828125 0.1221923828125 0.1221923828125 0.122314453125 0.122314453125 0.122314453125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12249755859375 0.12249755859375 0.12249755859375 0.12261962890625 0.12261962890625 0.12261962890625 0.12261962890625 0.12261962890625 0.1226806640625 0.1226806640625 0.1226806640625 0.122802734375 0.122802734375 0.122802734375 0.122802734375 0.122802734375 0.1229248046875 0.1229248046875 0.1229248046875 0.12298583984375 0.12298583984375 0.12298583984375 0.12298583984375 0.12298583984375 0.12310791015625 0.12310791015625 0.12310791015625 0.12322998046875 0.12322998046875 0.12322998046875 0.12322998046875 0.12322998046875 0.123291015625 0.123291015625 0.123291015625 0.1234130859375 0.1234130859375 0.1234130859375 0.1234130859375 0.1234130859375 0.12347412109375 0.12347412109375 0.12347412109375 0.12359619140625 0.12359619140625 0.12359619140625 0.12359619140625 0.12359619140625 0.12371826171875 0.12371826171875 0.12371826171875 0.123779296875 0.123779296875 0.123779296875 0.123779296875 0.123779296875 0.1239013671875 0.1239013671875 0.1239013671875 0.1240234375 0.1240234375 0.1240234375 0.1240234375 0.1240234375 0.12408447265625 0.12408447265625 0.12408447265625 0.12420654296875 0.12420654296875 0.12420654296875 0.12420654296875 0.12420654296875 0.12432861328125 0.12432861328125 0.12432861328125 0.1243896484375 0.1243896484375 0.1243896484375 0.1243896484375 0.1243896484375 0.12451171875 0.12451171875 0.12451171875 0.12457275390625 0.12457275390625 0.12457275390625 0.12457275390625 0.12457275390625 0.12469482421875 0.12469482421875 0.12469482421875 0.12481689453125 0.12481689453125 0.12481689453125 0.12481689453125 0.12481689453125 0.1248779296875 0.1248779296875 0.1248779296875 0.125 0.125 0.125 0.125 0.125 0.1251220703125 0.1251220703125 0.1251220703125 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.1253662109375 0.1253662109375 0.1253662109375 0.1253662109375 0.1253662109375 0.12548828125 0.12548828125 0.12548828125 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.125732421875 0.125732421875 0.125732421875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1260986328125 0.1260986328125 0.1260986328125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.12646484375 0.12646484375 0.12646484375 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.126708984375 0.126708984375 0.126708984375 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.1270751953125 0.1270751953125 0.1270751953125 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.1273193359375 0.1273193359375 0.1273193359375 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.127685546875 0.127685546875 0.127685546875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1279296875 0.1279296875 0.1279296875 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.1282958984375 0.1282958984375 0.1282958984375 0.12841796875 0.12841796875 0.12841796875 0.12841796875 0.12841796875 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.128662109375 0.128662109375 0.128662109375 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.12890625 0.12890625 0.12890625 0.1290283203125 0.1290283203125 0.1290283203125 0.1290283203125 0.1290283203125 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.1292724609375 0.1292724609375 0.1292724609375 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.1295166015625 0.1295166015625 0.1295166015625 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1298828125 0.1298828125 0.1298828125 0.1300048828125 0.1300048828125 0.1300048828125 0.1300048828125 0.1300048828125 0.130126953125 0.130126953125 0.130126953125 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.1304931640625 0.1304931640625 0.1304931640625 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.1307373046875 0.1307373046875 0.1307373046875 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.131103515625 0.131103515625 0.131103515625 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.1314697265625 0.1314697265625 0.1314697265625 0.131591796875 0.131591796875 0.131591796875 0.131591796875 0.131591796875 0.1317138671875 0.1317138671875 0.1317138671875 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.132080078125 0.132080078125 0.132080078125 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.13232421875 0.13232421875 0.13232421875 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.132568359375 0.132568359375 0.132568359375 0.132568359375 0.132568359375 0.1326904296875 0.1326904296875 0.1326904296875 0.1328125 0.1328125 0.1328125 0.1328125 0.1328125 0.1329345703125 0.1329345703125 0.1329345703125 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.13330078125 0.13330078125 0.13330078125 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.1336669921875 0.1336669921875 0.1336669921875 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1339111328125 0.1339111328125 0.1339111328125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.13427734375 0.13427734375 0.13427734375 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.134521484375 0.134521484375 0.134521484375 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.1348876953125 0.1348876953125 0.1348876953125 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.1351318359375 0.1351318359375 0.1351318359375 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.135498046875 0.135498046875 0.135498046875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1358642578125 0.1358642578125 0.1358642578125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.1361083984375 0.1361083984375 0.1361083984375 0.13623046875 0.13623046875 0.13623046875 0.13623046875 0.13623046875 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.136474609375 0.136474609375 0.136474609375 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.13671875 0.13671875 0.13671875 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.1370849609375 0.1370849609375 0.1370849609375 0.13720703125 0.13720703125 0.13720703125 0.13720703125 0.13720703125 0.1373291015625 0.1373291015625 0.1373291015625 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1376953125 0.1376953125 0.1376953125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.1380615234375 0.1380615234375 0.1380615234375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.1383056640625 0.1383056640625 0.1383056640625 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.138671875 0.138671875 0.138671875 0.1387939453125 0.1387939453125 0.1387939453125 0.1387939453125 0.1387939453125 0.138916015625 0.138916015625 0.138916015625 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.1392822265625 0.1392822265625 0.1392822265625 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.1395263671875 0.1395263671875 0.1395263671875 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1397705078125 0.1397705078125 0.1397705078125 0.1397705078125 0.1397705078125 0.139892578125 0.139892578125 0.139892578125 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.14013671875 0.14013671875 0.14013671875 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.1405029296875 0.1405029296875 0.1405029296875 0.140625 0.140625 0.140625 0.140625 0.140625 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.140869140625 0.140869140625 0.140869140625 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.14111328125 0.14111328125 0.14111328125 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.1414794921875 0.1414794921875 0.1414794921875 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1417236328125 0.1417236328125 0.1417236328125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.14208984375 0.14208984375 0.14208984375 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.142333984375 0.142333984375 0.142333984375 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.142578125 0.142578125 0.142578125 0.142578125 0.142578125 0.1427001953125 0.1427001953125 0.1427001953125 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.14306640625 0.14306640625 0.14306640625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.143310546875 0.143310546875 0.143310546875 0.1434326171875 0.1434326171875 0.1434326171875 0.1434326171875 0.1434326171875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1436767578125 0.1436767578125 0.1436767578125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.1439208984375 0.1439208984375 0.1439208984375 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.144287109375 0.144287109375 0.144287109375 0.1444091796875 0.1444091796875 0.1444091796875 0.1444091796875 0.1444091796875 0.14453125 0.14453125 0.14453125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.1448974609375 0.1448974609375 0.1448974609375 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.145263671875 0.145263671875 0.145263671875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1455078125 0.1455078125 0.1455078125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.1458740234375 0.1458740234375 0.1458740234375 0.14599609375 0.14599609375 0.14599609375 0.14599609375 0.14599609375 0.1461181640625 0.1461181640625 0.1461181640625 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.146484375 0.146484375 0.146484375 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.146728515625 0.146728515625 0.146728515625 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.14697265625 0.14697265625 0.14697265625 0.14697265625 0.14697265625 0.1470947265625 0.1470947265625 0.1470947265625 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.1473388671875 0.1473388671875 0.1473388671875 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1475830078125 0.1475830078125 0.1475830078125 0.1475830078125 0.1475830078125 0.147705078125 0.147705078125 0.147705078125 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.1480712890625 0.1480712890625 0.1480712890625 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.1483154296875 0.1483154296875 0.1483154296875 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.148681640625 0.148681640625 0.148681640625 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.14892578125 0.14892578125 0.14892578125 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1495361328125 0.1495361328125 0.1495361328125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.14990234375 0.14990234375 0.14990234375 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.1502685546875 0.1502685546875 0.1502685546875 0.150390625 0.150390625 0.150390625 0.150390625 0.150390625 0.1505126953125 0.1505126953125 0.1505126953125 0.150634765625 0.150634765625 0.150634765625 0.150634765625 0.150634765625 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.15087890625 0.15087890625 0.15087890625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.151123046875 0.151123046875 0.151123046875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1514892578125 0.1514892578125 0.1514892578125 0.151611328125 0.151611328125 0.151611328125 0.151611328125 0.151611328125 0.1517333984375 0.1517333984375 0.1517333984375 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.152099609375 0.152099609375 0.152099609375 0.1522216796875 0.1522216796875 0.1522216796875 0.1522216796875 0.1522216796875 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.1524658203125 0.1524658203125 0.1524658203125 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.1527099609375 0.1527099609375 0.1527099609375 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.153076171875 0.153076171875 0.153076171875 0.1531982421875 0.1531982421875 0.1531982421875 0.1531982421875 0.1531982421875 0.1533203125 0.1533203125 0.1533203125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.1536865234375 0.1536865234375 0.1536865234375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.1539306640625 0.1539306640625 0.1539306640625 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.1541748046875 0.1541748046875 0.1541748046875 0.1541748046875 0.1541748046875 0.154296875 0.154296875 0.154296875 0.1544189453125 0.1544189453125 0.1544189453125 0.1544189453125 0.1544189453125 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.1546630859375 0.1546630859375 0.1546630859375 0.15478515625 0.15478515625 0.15478515625 0.15478515625 0.15478515625 0.1549072265625 0.1549072265625 0.1549072265625 0.155029296875 0.155029296875 0.155029296875 0.155029296875 0.155029296875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1552734375 0.1552734375 0.1552734375 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.155517578125 0.155517578125 0.155517578125 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.15576171875 0.15576171875 0.15576171875 0.15576171875 0.15576171875 0.1558837890625 0.1558837890625 0.1558837890625 0.156005859375 0.156005859375 0.156005859375 0.156005859375 0.156005859375 0.1561279296875 0.1561279296875 0.1561279296875 0.15625 0.15625 0.15625 0.15625 0.15625 0.15625 0.15625 0.15625 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.156494140625 0.156494140625 0.156494140625 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.15673828125 0.15673828125 0.15673828125 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.1571044921875 0.1571044921875 0.1571044921875 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.157470703125 0.157470703125 0.157470703125 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.15771484375 0.15771484375 0.15771484375 0.1578369140625 0.1578369140625 0.1578369140625 0.1578369140625 0.1578369140625 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.1580810546875 0.1580810546875 0.1580810546875 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.1583251953125 0.1583251953125 0.1583251953125 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.15869140625 0.15869140625 0.15869140625 0.1588134765625 0.1588134765625 0.1588134765625 0.1588134765625 0.1588134765625 0.158935546875 0.158935546875 0.158935546875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1593017578125 0.1593017578125 0.1593017578125 0.159423828125 0.159423828125 0.159423828125 0.159423828125 0.159423828125 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.15966796875 0.15966796875 0.15966796875 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.159912109375 0.159912109375 0.159912109375 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.1602783203125 0.1602783203125 0.1602783203125 0.160400390625 0.160400390625 0.160400390625 0.160400390625 0.160400390625 0.1605224609375 0.1605224609375 0.1605224609375 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.160888671875 0.160888671875 0.160888671875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1611328125 0.1611328125 0.1611328125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.161376953125 0.161376953125 0.161376953125 0.161376953125 0.161376953125 0.1614990234375 0.1614990234375 0.1614990234375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.161865234375 0.161865234375 0.161865234375 0.1619873046875 0.1619873046875 0.1619873046875 0.1619873046875 0.1619873046875 0.162109375 0.162109375 0.162109375 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.1624755859375 0.1624755859375 0.1624755859375 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.1627197265625 0.1627197265625 0.1627197265625 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.1629638671875 0.1629638671875 0.1629638671875 0.1629638671875 0.1629638671875 0.1630859375 0.1630859375 0.1630859375 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.163330078125 0.163330078125 0.163330078125 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.1636962890625 0.1636962890625 0.1636962890625 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1640625 0.1640625 0.1640625 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.164306640625 0.164306640625 0.164306640625 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.1646728515625 0.1646728515625 0.1646728515625 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.165283203125 0.165283203125 0.165283203125 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.16552734375 0.16552734375 0.16552734375 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.1658935546875 0.1658935546875 0.1658935546875 0.166015625 0.166015625 0.166015625 0.166015625 0.166015625 0.1661376953125 0.1661376953125 0.1661376953125 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.16650390625 0.16650390625 0.16650390625 0.1666259765625 0.1666259765625 0.1666259765625 0.1666259765625 0.1666259765625 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.1668701171875 0.1668701171875 0.1668701171875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1671142578125 0.1671142578125 0.1671142578125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.16748046875 0.16748046875 0.16748046875 0.1676025390625 0.1676025390625 0.1676025390625 0.1676025390625 0.1676025390625 0.167724609375 0.167724609375 0.167724609375 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.1680908203125 0.1680908203125 0.1680908203125 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.1683349609375 0.1683349609375 0.1683349609375 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.168701171875 0.168701171875 0.168701171875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1690673828125 0.1690673828125 0.1690673828125 0.169189453125 0.169189453125 0.169189453125 0.169189453125 0.169189453125 0.1693115234375 0.1693115234375 0.1693115234375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.169677734375 0.169677734375 0.169677734375 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.169921875 0.169921875 0.169921875 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.170166015625 0.170166015625 0.170166015625 0.170166015625 0.170166015625 0.1702880859375 0.1702880859375 0.1702880859375 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.1705322265625 0.1705322265625 0.1705322265625 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1708984375 0.1708984375 0.1708984375 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.1712646484375 0.1712646484375 0.1712646484375 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.1715087890625 0.1715087890625 0.1715087890625 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.171875 0.171875 0.171875 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.172119140625 0.172119140625 0.172119140625 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.1724853515625 0.1724853515625 0.1724853515625 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.1727294921875 0.1727294921875 0.1727294921875 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.173095703125 0.173095703125 0.173095703125 0.1732177734375 0.1732177734375 0.1732177734375 0.1732177734375 0.1732177734375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.1734619140625 0.1734619140625 0.1734619140625 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.1737060546875 0.1737060546875 0.1737060546875 0.173828125 0.173828125 0.173828125 0.173828125 0.173828125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.174072265625 0.174072265625 0.174072265625 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.17431640625 0.17431640625 0.17431640625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.1746826171875 0.1746826171875 0.1746826171875 0.1748046875 0.1748046875 0.1748046875 0.1748046875 0.1748046875 0.1749267578125 0.1749267578125 0.1749267578125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.17529296875 0.17529296875 0.17529296875 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.175537109375 0.175537109375 0.175537109375 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.1759033203125 0.1759033203125 0.1759033203125 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.17626953125 0.17626953125 0.17626953125 0.1763916015625 0.1763916015625 0.1763916015625 0.1763916015625 0.1763916015625 0.176513671875 0.176513671875 0.176513671875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1768798828125 0.1768798828125 0.1768798828125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.1771240234375 0.1771240234375 0.1771240234375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.1773681640625 0.1773681640625 0.1773681640625 0.1773681640625 0.1773681640625 0.177490234375 0.177490234375 0.177490234375 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.177734375 0.177734375 0.177734375 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.1781005859375 0.1781005859375 0.1781005859375 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.178466796875 0.178466796875 0.178466796875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1787109375 0.1787109375 0.1787109375 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.1790771484375 0.1790771484375 0.1790771484375 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.1793212890625 0.1793212890625 0.1793212890625 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1796875 0.1796875 0.1796875 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.179931640625 0.179931640625 0.179931640625 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.1802978515625 0.1802978515625 0.1802978515625 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1806640625 0.1806640625 0.1806640625 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.180908203125 0.180908203125 0.180908203125 0.1810302734375 0.1810302734375 0.1810302734375 0.1810302734375 0.1810302734375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.1812744140625 0.1812744140625 0.1812744140625 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.1815185546875 0.1815185546875 0.1815185546875 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.181884765625 0.181884765625 0.181884765625 0.1820068359375 0.1820068359375 0.1820068359375 0.1820068359375 0.1820068359375 0.18212890625 0.18212890625 0.18212890625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.1824951171875 0.1824951171875 0.1824951171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.182861328125 0.182861328125 0.182861328125 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.18310546875 0.18310546875 0.18310546875 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.1834716796875 0.1834716796875 0.1834716796875 0.18359375 0.18359375 0.18359375 0.18359375 0.18359375 0.1837158203125 0.1837158203125 0.1837158203125 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.18408203125 0.18408203125 0.18408203125 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.184326171875 0.184326171875 0.184326171875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1845703125 0.1845703125 0.1845703125 0.1845703125 0.1845703125 0.1846923828125 0.1846923828125 0.1846923828125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.1849365234375 0.1849365234375 0.1849365234375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.185302734375 0.185302734375 0.185302734375 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.1856689453125 0.1856689453125 0.1856689453125 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.1859130859375 0.1859130859375 0.1859130859375 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.186279296875 0.186279296875 0.186279296875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1865234375 0.1865234375 0.1865234375 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.1868896484375 0.1868896484375 0.1868896484375 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.1871337890625 0.1871337890625 0.1871337890625 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1875 0.1875 0.1875 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.1878662109375 0.1878662109375 0.1878662109375 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.1881103515625 0.1881103515625 0.1881103515625 0.188232421875 0.188232421875 0.188232421875 0.188232421875 0.188232421875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1884765625 0.1884765625 0.1884765625 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.188720703125 0.188720703125 0.188720703125 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.1890869140625 0.1890869140625 0.1890869140625 0.189208984375 0.189208984375 0.189208984375 0.189208984375 0.189208984375 0.1893310546875 0.1893310546875 0.1893310546875 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.189697265625 0.189697265625 0.189697265625 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.1900634765625 0.1900634765625 0.1900634765625 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.1903076171875 0.1903076171875 0.1903076171875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.190673828125 0.190673828125 0.190673828125 0.1907958984375 0.1907958984375 0.1907958984375 0.1907958984375 0.1907958984375 0.19091796875 0.19091796875 0.19091796875 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.1912841796875 0.1912841796875 0.1912841796875 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.1915283203125 0.1915283203125 0.1915283203125 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.1917724609375 0.1917724609375 0.1917724609375 0.1917724609375 0.1917724609375 0.19189453125 0.19189453125 0.19189453125 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.192138671875 0.192138671875 0.192138671875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1923828125 0.1923828125 0.1923828125 0.1923828125 0.1923828125 0.1925048828125 0.1925048828125 0.1925048828125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.19287109375 0.19287109375 0.19287109375 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.193115234375 0.193115234375 0.193115234375 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.1934814453125 0.1934814453125 0.1934814453125 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.1937255859375 0.1937255859375 0.1937255859375 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.194091796875 0.194091796875 0.194091796875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1943359375 0.1943359375 0.1943359375 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.1947021484375 0.1947021484375 0.1947021484375 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.195068359375 0.195068359375 0.195068359375 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1953125 0.1953125 0.1953125 0.1954345703125 0.1954345703125 0.1954345703125 0.1954345703125 0.1954345703125 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.1956787109375 0.1956787109375 0.1956787109375 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.1959228515625 0.1959228515625 0.1959228515625 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1962890625 0.1962890625 0.1962890625 0.1964111328125 0.1964111328125 0.1964111328125 0.1964111328125 0.1964111328125 0.196533203125 0.196533203125 0.196533203125 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.1968994140625 0.1968994140625 0.1968994140625 0.197021484375 0.197021484375 0.197021484375 0.197021484375 0.197021484375 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.197265625 0.197265625 0.197265625 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.197509765625 0.197509765625 0.197509765625 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.1978759765625 0.1978759765625 0.1978759765625 0.197998046875 0.197998046875 0.197998046875 0.197998046875 0.197998046875 0.1981201171875 0.1981201171875 0.1981201171875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.198486328125 0.198486328125 0.198486328125 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.19873046875 0.19873046875 0.19873046875 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.198974609375 0.198974609375 0.198974609375 0.198974609375 0.198974609375 0.1990966796875 0.1990966796875 0.1990966796875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.199462890625 0.199462890625 0.199462890625 0.1995849609375 0.1995849609375 0.1995849609375 0.1995849609375 0.1995849609375 0.19970703125 0.19970703125 0.19970703125 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.2000732421875 0.2000732421875 0.2000732421875 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2003173828125 0.2003173828125 0.2003173828125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.2005615234375 0.2005615234375 0.2005615234375 0.2005615234375 0.2005615234375 0.20068359375 0.20068359375 0.20068359375 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.200927734375 0.200927734375 0.200927734375 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.2012939453125 0.2012939453125 0.2012939453125 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.2015380859375 0.2015380859375 0.2015380859375 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.201904296875 0.201904296875 0.201904296875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2022705078125 0.2022705078125 0.2022705078125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.2025146484375 0.2025146484375 0.2025146484375 0.20263671875 0.20263671875 0.20263671875 0.20263671875 0.20263671875 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.202880859375 0.202880859375 0.202880859375 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.203125 0.203125 0.203125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.2034912109375 0.2034912109375 0.2034912109375 0.20361328125 0.20361328125 0.20361328125 0.20361328125 0.20361328125 0.2037353515625 0.2037353515625 0.2037353515625 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2041015625 0.2041015625 0.2041015625 0.2042236328125 0.2042236328125 0.2042236328125 0.2042236328125 0.2042236328125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.2044677734375 0.2044677734375 0.2044677734375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.2047119140625 0.2047119140625 0.2047119140625 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.22265625 0.22265625 0.22265625 0.22265625 0.22265625 0.22265625 0.22265625 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.240234375 0.240234375 0.240234375 0.240234375 0.240234375 0.240234375 0.240234375 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.328125 0.328125 0.328125 0.328125 0.328125 0.328125 0.328125 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388671875 0.388671875 0.388671875 0.388671875 0.388671875 0.388671875 0.388671875 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.40625 0.40625 0.40625 0.40625 0.40625 0.40625 0.40625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.4619140625 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.462890625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_inputs_q.data b/runtime/test/test_files/flash_attn_kvcache_inputs_q.data new file mode 100644 index 000000000..0298445da --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_inputs_q.data @@ -0,0 +1 @@ +0.0 3.331899642944336e-05 6.663799285888672e-05 0.00010001659393310547 0.00013327598571777344 0.0001666545867919922 0.00020003318786621094 0.0002332925796508789 0.0002665519714355469 0.0002999305725097656 0.0003333091735839844 0.0003666877746582031 0.0004000663757324219 0.0004334449768066406 0.0004665851593017578 0.0005002021789550781 0.0005331039428710938 0.0005664825439453125 0.0005998611450195312 0.00063323974609375 0.0006666183471679688 0.0006999969482421875 0.0007333755493164062 0.000766754150390625 0.0008001327514648438 0.0008335113525390625 0.0008668899536132812 0.0008997917175292969 0.0009331703186035156 0.0009665489196777344 0.0010004043579101562 0.001033782958984375 0.0010662078857421875 0.0010995864868164062 0.001132965087890625 0.0011663436889648438 0.0011997222900390625 0.0012331008911132812 0.0012664794921875 0.0012998580932617188 0.0013332366943359375 0.0013666152954101562 0.001399993896484375 0.0014333724975585938 0.0014667510986328125 0.0015001296997070312 0.00153350830078125 0.0015668869018554688 0.0016002655029296875 0.0016336441040039062 0.001667022705078125 0.0017004013061523438 0.0017337799072265625 0.001766204833984375 0.0017995834350585938 0.0018329620361328125 0.0018663406372070312 0.00189971923828125 0.0019330978393554688 0.0019664764404296875 0.0020008087158203125 0.002033233642578125 0.00206756591796875 0.0020999908447265625 0.002132415771484375 0.002166748046875 0.0021991729736328125 0.0022335052490234375 0.00226593017578125 0.002300262451171875 0.0023326873779296875 0.0023670196533203125 0.002399444580078125 0.00243377685546875 0.0024662017822265625 0.0025005340576171875 0.002532958984375 0.002567291259765625 0.0025997161865234375 0.0026340484619140625 0.002666473388671875 0.0027008056640625 0.0027332305908203125 0.0027675628662109375 0.00279998779296875 0.0028324127197265625 0.0028667449951171875 0.002899169921875 0.002933502197265625 0.0029659271240234375 0.0030002593994140625 0.003032684326171875 0.0030670166015625 0.0030994415283203125 0.0031337738037109375 0.00316619873046875 0.003200531005859375 0.0032329559326171875 0.0032672882080078125 0.003299713134765625 0.00333404541015625 0.0033664703369140625 0.0034008026123046875 0.0034332275390625 0.003467559814453125 0.0034999847412109375 0.00353240966796875 0.003566741943359375 0.0035991668701171875 0.0036334991455078125 0.003665924072265625 0.00370025634765625 0.0037326812744140625 0.0037670135498046875 0.0037994384765625 0.003833770751953125 0.0038661956787109375 0.0039005279541015625 0.003932952880859375 0.00396728515625 0.004001617431640625 0.004032135009765625 0.00406646728515625 0.004100799560546875 0.0041351318359375 0.0041656494140625 0.004199981689453125 0.00423431396484375 0.00426483154296875 0.004299163818359375 0.00433349609375 0.004367828369140625 0.004398345947265625 0.00443267822265625 0.004467010498046875 0.0045013427734375 0.0045318603515625 0.004566192626953125 0.00460052490234375 0.004634857177734375 0.004665374755859375 0.00469970703125 0.004734039306640625 0.00476837158203125 0.00479888916015625 0.004833221435546875 0.0048675537109375 0.004901885986328125 0.004932403564453125 0.00496673583984375 0.005001068115234375 0.005031585693359375 0.00506591796875 0.005100250244140625 0.00513458251953125 0.00516510009765625 0.005199432373046875 0.0052337646484375 0.005268096923828125 0.005298614501953125 0.00533294677734375 0.005367279052734375 0.005401611328125 0.00543212890625 0.005466461181640625 0.00550079345703125 0.005535125732421875 0.005565643310546875 0.0055999755859375 0.005634307861328125 0.005664825439453125 0.00569915771484375 0.005733489990234375 0.005767822265625 0.00579833984375 0.005832672119140625 0.00586700439453125 0.005901336669921875 0.005931854248046875 0.0059661865234375 0.006000518798828125 0.00603485107421875 0.00606536865234375 0.006099700927734375 0.006134033203125 0.006168365478515625 0.006198883056640625 0.00623321533203125 0.006267547607421875 0.0063018798828125 0.0063323974609375 0.006366729736328125 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_inputs_v.data b/runtime/test/test_files/flash_attn_kvcache_inputs_v.data new file mode 100644 index 000000000..ad01ded9c --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_inputs_v.data @@ -0,0 +1 @@ +0.0 2.002716064453125e-05 3.999471664428711e-05 6.002187728881836e-05 7.998943328857422e-05 0.00010001659393310547 0.00011998414993286133 0.0001399517059326172 0.00015997886657714844 0.0001800060272216797 0.00020003318786621094 0.0002199411392211914 0.00023996829986572266 0.0002601146697998047 0.0002799034118652344 0.0002999305725097656 0.0003199577331542969 0.0003399848937988281 0.0003600120544433594 0.0003800392150878906 0.0004000663757324219 0.0004200935363769531 0.0004398822784423828 0.00045990943908691406 0.0004799365997314453 0.0005002021789550781 0.0005202293395996094 0.0005397796630859375 0.0005598068237304688 0.000579833984375 0.0005998611450195312 0.0006198883056640625 0.0006399154663085938 0.000659942626953125 0.0006799697875976562 0.0006999969482421875 0.0007200241088867188 0.00074005126953125 0.0007600784301757812 0.0007801055908203125 0.0008001327514648438 0.000820159912109375 0.0008401870727539062 0.0008602142333984375 0.0008797645568847656 0.0008997917175292969 0.0009198188781738281 0.0009398460388183594 0.0009598731994628906 0.000980377197265625 0.0010004043579101562 0.0010204315185546875 0.0010404586791992188 0.0010595321655273438 0.001079559326171875 0.0010995864868164062 0.0011196136474609375 0.0011396408081054688 0.00115966796875 0.0011796951293945312 0.0011997222900390625 0.0012197494506835938 0.001239776611328125 0.0012598037719726562 0.0012798309326171875 0.0012998580932617188 0.00131988525390625 0.0013399124145507812 0.0013599395751953125 0.0013799667358398438 0.001399993896484375 0.0014200210571289062 0.0014400482177734375 0.0014600753784179688 0.0014801025390625 0.0015001296997070312 0.0015201568603515625 0.0015401840209960938 0.001560211181640625 0.0015802383422851562 0.0016002655029296875 0.0016202926635742188 0.00164031982421875 0.0016603469848632812 0.0016803741455078125 0.0017004013061523438 0.001720428466796875 0.0017404556274414062 0.0017595291137695312 0.0017795562744140625 0.0017995834350585938 0.001819610595703125 0.0018396377563476562 0.0018596649169921875 0.0018796920776367188 0.00189971923828125 0.0019197463989257812 0.0019397735595703125 0.00196075439453125 0.001979827880859375 0.0020008087158203125 0.0020198822021484375 0.002040863037109375 0.0020599365234375 0.0020809173583984375 0.0020999908447265625 0.0021190643310546875 0.002140045166015625 0.00215911865234375 0.0021800994873046875 0.0021991729736328125 0.00222015380859375 0.002239227294921875 0.0022602081298828125 0.0022792816162109375 0.002300262451171875 0.0023193359375 0.0023403167724609375 0.0023593902587890625 0.00238037109375 0.002399444580078125 0.0024204254150390625 0.0024394989013671875 0.002460479736328125 0.00247955322265625 0.0025005340576171875 0.0025196075439453125 0.00254058837890625 0.002559661865234375 0.0025806427001953125 0.0025997161865234375 0.002620697021484375 0.0026397705078125 0.0026607513427734375 0.0026798248291015625 0.0027008056640625 0.002719879150390625 0.0027408599853515625 0.0027599334716796875 0.002780914306640625 0.00279998779296875 0.002819061279296875 0.0028400421142578125 0.0028591156005859375 0.002880096435546875 0.002899169921875 0.0029201507568359375 0.0029392242431640625 0.002960205078125 0.002979278564453125 0.0030002593994140625 0.0030193328857421875 0.003040313720703125 0.00305938720703125 0.0030803680419921875 0.0030994415283203125 0.00312042236328125 0.003139495849609375 0.0031604766845703125 0.0031795501708984375 0.003200531005859375 0.0032196044921875 0.0032405853271484375 0.0032596588134765625 0.0032806396484375 0.003299713134765625 0.0033206939697265625 0.0033397674560546875 0.003360748291015625 0.00337982177734375 0.0034008026123046875 0.0034198760986328125 0.00344085693359375 0.003459930419921875 0.0034809112548828125 0.0034999847412109375 0.0035190582275390625 0.0035400390625 0.003559112548828125 0.0035800933837890625 0.0035991668701171875 0.003620147705078125 0.00363922119140625 0.0036602020263671875 0.0036792755126953125 0.00370025634765625 0.003719329833984375 0.0037403106689453125 0.0037593841552734375 0.003780364990234375 0.0037994384765625 0.0038204193115234375 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_inputs_vcache.data b/runtime/test/test_files/flash_attn_kvcache_inputs_vcache.data new file mode 100644 index 000000000..57238910f --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_inputs_vcache.data @@ -0,0 +1 @@ +0.0 2.002716064453125e-05 3.999471664428711e-05 6.002187728881836e-05 7.998943328857422e-05 0.00010001659393310547 0.00011998414993286133 0.0001399517059326172 0.00015997886657714844 0.0001800060272216797 0.00020003318786621094 0.0002199411392211914 0.00023996829986572266 0.0002601146697998047 0.0002799034118652344 0.0002999305725097656 0.0003199577331542969 0.0003399848937988281 0.0003600120544433594 0.0003800392150878906 0.0004000663757324219 0.0004200935363769531 0.0004398822784423828 0.00045990943908691406 0.0004799365997314453 0.0005002021789550781 0.0005202293395996094 0.0005397796630859375 0.0005598068237304688 0.000579833984375 0.0005998611450195312 0.0006198883056640625 0.0006399154663085938 0.000659942626953125 0.0006799697875976562 0.0006999969482421875 0.0007200241088867188 0.00074005126953125 0.0007600784301757812 0.0007801055908203125 0.0008001327514648438 0.000820159912109375 0.0008401870727539062 0.0008602142333984375 0.0008797645568847656 0.0008997917175292969 0.0009198188781738281 0.0009398460388183594 0.0009598731994628906 0.000980377197265625 0.0010004043579101562 0.0010204315185546875 0.0010404586791992188 0.0010595321655273438 0.001079559326171875 0.0010995864868164062 0.0011196136474609375 0.0011396408081054688 0.00115966796875 0.0011796951293945312 0.0011997222900390625 0.0012197494506835938 0.001239776611328125 0.0012598037719726562 0.0012798309326171875 0.0012998580932617188 0.00131988525390625 0.0013399124145507812 0.0013599395751953125 0.0013799667358398438 0.001399993896484375 0.0014200210571289062 0.0014400482177734375 0.0014600753784179688 0.0014801025390625 0.0015001296997070312 0.0015201568603515625 0.0015401840209960938 0.001560211181640625 0.0015802383422851562 0.0016002655029296875 0.0016202926635742188 0.00164031982421875 0.0016603469848632812 0.0016803741455078125 0.0017004013061523438 0.001720428466796875 0.0017404556274414062 0.0017595291137695312 0.0017795562744140625 0.0017995834350585938 0.001819610595703125 0.0018396377563476562 0.0018596649169921875 0.0018796920776367188 0.00189971923828125 0.0019197463989257812 0.0019397735595703125 0.00196075439453125 0.001979827880859375 0.0020008087158203125 0.0020198822021484375 0.002040863037109375 0.0020599365234375 0.0020809173583984375 0.0020999908447265625 0.0021190643310546875 0.002140045166015625 0.00215911865234375 0.0021800994873046875 0.0021991729736328125 0.00222015380859375 0.002239227294921875 0.0022602081298828125 0.0022792816162109375 0.002300262451171875 0.0023193359375 0.0023403167724609375 0.0023593902587890625 0.00238037109375 0.002399444580078125 0.0024204254150390625 0.0024394989013671875 0.002460479736328125 0.00247955322265625 0.0025005340576171875 0.0025196075439453125 0.00254058837890625 0.002559661865234375 0.0025806427001953125 0.0025997161865234375 0.002620697021484375 0.0026397705078125 0.0026607513427734375 0.0026798248291015625 0.0027008056640625 0.002719879150390625 0.0027408599853515625 0.0027599334716796875 0.002780914306640625 0.00279998779296875 0.002819061279296875 0.0028400421142578125 0.0028591156005859375 0.002880096435546875 0.002899169921875 0.0029201507568359375 0.0029392242431640625 0.002960205078125 0.002979278564453125 0.0030002593994140625 0.0030193328857421875 0.003040313720703125 0.00305938720703125 0.0030803680419921875 0.0030994415283203125 0.00312042236328125 0.003139495849609375 0.0031604766845703125 0.0031795501708984375 0.003200531005859375 0.0032196044921875 0.0032405853271484375 0.0032596588134765625 0.0032806396484375 0.003299713134765625 0.0033206939697265625 0.0033397674560546875 0.003360748291015625 0.00337982177734375 0.0034008026123046875 0.0034198760986328125 0.00344085693359375 0.003459930419921875 0.0034809112548828125 0.0034999847412109375 0.0035190582275390625 0.0035400390625 0.003559112548828125 0.0035800933837890625 0.0035991668701171875 0.003620147705078125 0.00363922119140625 0.0036602020263671875 0.0036792755126953125 0.00370025634765625 0.003719329833984375 0.0037403106689453125 0.0037593841552734375 0.003780364990234375 0.0037994384765625 0.0038204193115234375 0.0038394927978515625 0.0038604736328125 0.003879547119140625 0.0039005279541015625 0.0039215087890625 0.003940582275390625 0.00395965576171875 0.003978729248046875 0.004001617431640625 0.00402069091796875 0.004039764404296875 0.004058837890625 0.00408172607421875 0.004100799560546875 0.004119873046875 0.004138946533203125 0.004161834716796875 0.004180908203125 0.004199981689453125 0.00421905517578125 0.004238128662109375 0.004261016845703125 0.00428009033203125 0.004299163818359375 0.0043182373046875 0.00434112548828125 0.004360198974609375 0.0043792724609375 0.004398345947265625 0.004421234130859375 0.0044403076171875 0.004459381103515625 0.00447845458984375 0.0045013427734375 0.004520416259765625 0.00453948974609375 0.004558563232421875 0.004581451416015625 0.00460052490234375 0.004619598388671875 0.004638671875 0.00466156005859375 0.004680633544921875 0.00469970703125 0.004718780517578125 0.004741668701171875 0.0047607421875 0.004779815673828125 0.00479888916015625 0.00482177734375 0.004840850830078125 0.00485992431640625 0.004878997802734375 0.004901885986328125 0.00492095947265625 0.004940032958984375 0.0049591064453125 0.004978179931640625 0.005001068115234375 0.0050201416015625 0.005039215087890625 0.00505828857421875 0.0050811767578125 0.005100250244140625 0.00511932373046875 0.005138397216796875 0.005161285400390625 0.00518035888671875 0.005199432373046875 0.005218505859375 0.00524139404296875 0.005260467529296875 0.005279541015625 0.005298614501953125 0.005321502685546875 0.005340576171875 0.005359649658203125 0.00537872314453125 0.005401611328125 0.005420684814453125 0.00543975830078125 0.005458831787109375 0.005481719970703125 0.00550079345703125 0.005519866943359375 0.0055389404296875 0.00556182861328125 0.005580902099609375 0.0055999755859375 0.005619049072265625 0.00563812255859375 0.0056610107421875 0.005680084228515625 0.00569915771484375 0.005718231201171875 0.005741119384765625 0.00576019287109375 0.005779266357421875 0.00579833984375 0.00582122802734375 0.005840301513671875 0.005859375 0.005878448486328125 0.005901336669921875 0.00592041015625 0.005939483642578125 0.00595855712890625 0.0059814453125 0.006000518798828125 0.00601959228515625 0.006038665771484375 0.006061553955078125 0.00608062744140625 0.006099700927734375 0.0061187744140625 0.00614166259765625 0.006160736083984375 0.0061798095703125 0.006198883056640625 0.006221771240234375 0.0062408447265625 0.006259918212890625 0.00627899169921875 0.0063018798828125 0.006320953369140625 0.00634002685546875 0.006359100341796875 0.006378173828125 0.00640106201171875 0.006420135498046875 0.006439208984375 0.006458282470703125 0.006481170654296875 0.006500244140625 0.006519317626953125 0.00653839111328125 0.006561279296875 0.006580352783203125 0.00659942626953125 0.006618499755859375 0.006641387939453125 0.00666046142578125 0.006679534912109375 0.0066986083984375 0.00672149658203125 0.006740570068359375 0.0067596435546875 0.006778717041015625 0.006801605224609375 0.0068206787109375 0.006839752197265625 0.00685882568359375 0.0068817138671875 0.006900787353515625 0.00691986083984375 0.006938934326171875 0.006961822509765625 0.00698089599609375 0.006999969482421875 0.00701904296875 0.007038116455078125 0.007061004638671875 0.007080078125 0.007099151611328125 0.00711822509765625 0.00714111328125 0.007160186767578125 0.00717926025390625 0.007198333740234375 0.007221221923828125 0.00724029541015625 0.007259368896484375 0.0072784423828125 0.00730133056640625 0.007320404052734375 0.0073394775390625 0.007358551025390625 0.007381439208984375 0.0074005126953125 0.007419586181640625 0.00743865966796875 0.0074615478515625 0.007480621337890625 0.00749969482421875 0.007518768310546875 0.007541656494140625 0.00756072998046875 0.007579803466796875 0.007598876953125 0.00762176513671875 0.007640838623046875 0.007659912109375 0.007678985595703125 0.007701873779296875 0.007720947265625 0.007740020751953125 0.00775909423828125 0.007778167724609375 0.007801055908203125 0.00782012939453125 0.007843017578125 0.0078582763671875 0.00788116455078125 0.00789642333984375 0.0079193115234375 0.00794219970703125 0.00795745849609375 0.0079803466796875 0.00800323486328125 0.00801849365234375 0.0080413818359375 0.008056640625 0.00807952880859375 0.0081024169921875 0.00811767578125 0.00814056396484375 0.0081634521484375 0.0081787109375 0.00820159912109375 0.00821685791015625 0.00823974609375 0.00826263427734375 0.00827789306640625 0.00830078125 0.00832366943359375 0.00833892822265625 0.00836181640625 0.0083770751953125 0.00839996337890625 0.0084228515625 0.0084381103515625 0.00846099853515625 0.00847625732421875 0.0084991455078125 0.00852203369140625 0.00853729248046875 0.0085601806640625 0.00858306884765625 0.00859832763671875 0.0086212158203125 0.008636474609375 0.00865936279296875 0.0086822509765625 0.008697509765625 0.00872039794921875 0.0087432861328125 0.008758544921875 0.00878143310546875 0.00879669189453125 0.008819580078125 0.00884246826171875 0.00885772705078125 0.008880615234375 0.00890350341796875 0.00891876220703125 0.008941650390625 0.0089569091796875 0.00897979736328125 0.009002685546875 0.0090179443359375 0.00904083251953125 0.009063720703125 0.0090789794921875 0.00910186767578125 0.00911712646484375 0.0091400146484375 0.00916290283203125 0.00917816162109375 0.0092010498046875 0.00921630859375 0.00923919677734375 0.0092620849609375 0.00927734375 0.00930023193359375 0.0093231201171875 0.00933837890625 0.00936126708984375 0.00937652587890625 0.0093994140625 0.00942230224609375 0.00943756103515625 0.00946044921875 0.00948333740234375 0.00949859619140625 0.009521484375 0.0095367431640625 0.00955963134765625 0.00958251953125 0.0095977783203125 0.00962066650390625 0.0096435546875 0.0096588134765625 0.00968170166015625 0.00969696044921875 0.0097198486328125 0.00974273681640625 0.00975799560546875 0.0097808837890625 0.00980377197265625 0.00981903076171875 0.0098419189453125 0.009857177734375 0.00988006591796875 0.0099029541015625 0.009918212890625 0.00994110107421875 0.00995635986328125 0.009979248046875 0.01000213623046875 0.01001739501953125 0.010040283203125 0.01006317138671875 0.01007843017578125 0.010101318359375 0.0101165771484375 0.01013946533203125 0.010162353515625 0.0101776123046875 0.01020050048828125 0.010223388671875 0.0102386474609375 0.01026153564453125 0.01027679443359375 0.0102996826171875 0.01032257080078125 0.01033782958984375 0.0103607177734375 0.01038360595703125 0.01039886474609375 0.0104217529296875 0.01043701171875 0.01045989990234375 0.0104827880859375 0.010498046875 0.01052093505859375 0.01053619384765625 0.01055908203125 0.01058197021484375 0.01059722900390625 0.0106201171875 0.01064300537109375 0.01065826416015625 0.01068115234375 0.0106964111328125 0.01071929931640625 0.0107421875 0.0107574462890625 0.01078033447265625 0.01080322265625 0.0108184814453125 0.01084136962890625 0.01085662841796875 0.0108795166015625 0.01090240478515625 0.01091766357421875 0.0109405517578125 0.01096343994140625 0.01097869873046875 0.0110015869140625 0.011016845703125 0.01103973388671875 0.0110626220703125 0.011077880859375 0.01110076904296875 0.0111236572265625 0.011138916015625 0.01116180419921875 0.01117706298828125 0.011199951171875 0.01122283935546875 0.01123809814453125 0.011260986328125 0.0112762451171875 0.01129913330078125 0.011322021484375 0.0113372802734375 0.01136016845703125 0.011383056640625 0.0113983154296875 0.01142120361328125 0.01143646240234375 0.0114593505859375 0.01148223876953125 0.01149749755859375 0.0115203857421875 0.01154327392578125 0.01155853271484375 0.0115814208984375 0.0115966796875 0.01161956787109375 0.0116424560546875 0.01165771484375 0.01168060302734375 0.0117034912109375 0.01171875 0.01174163818359375 0.01175689697265625 0.01177978515625 0.01180267333984375 0.01181793212890625 0.0118408203125 0.01186370849609375 0.01187896728515625 0.01190185546875 0.0119171142578125 0.01194000244140625 0.011962890625 0.0119781494140625 0.01200103759765625 0.01201629638671875 0.0120391845703125 0.01206207275390625 0.01207733154296875 0.0121002197265625 0.01212310791015625 0.01213836669921875 0.0121612548828125 0.012176513671875 0.01219940185546875 0.0122222900390625 0.012237548828125 0.01226043701171875 0.0122833251953125 0.012298583984375 0.01232147216796875 0.01233673095703125 0.012359619140625 0.01238250732421875 0.01239776611328125 0.012420654296875 0.01244354248046875 0.01245880126953125 0.012481689453125 0.0124969482421875 0.01251983642578125 0.012542724609375 0.0125579833984375 0.01258087158203125 0.012603759765625 0.0126190185546875 0.01264190673828125 0.01265716552734375 0.0126800537109375 0.01270294189453125 0.01271820068359375 0.0127410888671875 0.01275634765625 0.01277923583984375 0.0128021240234375 0.0128173828125 0.01284027099609375 0.0128631591796875 0.01287841796875 0.01290130615234375 0.01291656494140625 0.012939453125 0.01296234130859375 0.01297760009765625 0.01300048828125 0.01302337646484375 0.01303863525390625 0.0130615234375 0.0130767822265625 0.01309967041015625 0.01312255859375 0.0131378173828125 0.01316070556640625 0.01318359375 0.0131988525390625 0.01322174072265625 0.01323699951171875 0.0132598876953125 0.01328277587890625 0.01329803466796875 0.0133209228515625 0.01334381103515625 0.01335906982421875 0.0133819580078125 0.013397216796875 0.01342010498046875 0.0134429931640625 0.013458251953125 0.01348114013671875 0.01349639892578125 0.013519287109375 0.01354217529296875 0.01355743408203125 0.013580322265625 0.01360321044921875 0.01361846923828125 0.013641357421875 0.0136566162109375 0.01367950439453125 0.013702392578125 0.0137176513671875 0.01374053955078125 0.013763427734375 0.0137786865234375 0.01380157470703125 0.01381683349609375 0.0138397216796875 0.01386260986328125 0.01387786865234375 0.0139007568359375 0.01392364501953125 0.01393890380859375 0.0139617919921875 0.01397705078125 0.01399993896484375 0.0140228271484375 0.0140380859375 0.01406097412109375 0.01407623291015625 0.01409912109375 0.01412200927734375 0.01413726806640625 0.01416015625 0.01418304443359375 0.01419830322265625 0.01422119140625 0.0142364501953125 0.01425933837890625 0.0142822265625 0.0142974853515625 0.01432037353515625 0.01434326171875 0.0143585205078125 0.01438140869140625 0.01439666748046875 0.0144195556640625 0.01444244384765625 0.01445770263671875 0.0144805908203125 0.01450347900390625 0.01451873779296875 0.0145416259765625 0.014556884765625 0.01457977294921875 0.0146026611328125 0.014617919921875 0.01464080810546875 0.0146636962890625 0.014678955078125 0.01470184326171875 0.01471710205078125 0.014739990234375 0.01476287841796875 0.01477813720703125 0.014801025390625 0.0148162841796875 0.01483917236328125 0.014862060546875 0.0148773193359375 0.01490020751953125 0.014923095703125 0.0149383544921875 0.01496124267578125 0.01497650146484375 0.0149993896484375 0.01502227783203125 0.01503753662109375 0.0150604248046875 0.01508331298828125 0.01509857177734375 0.0151214599609375 0.01513671875 0.01515960693359375 0.0151824951171875 0.01519775390625 0.01522064208984375 0.0152435302734375 0.0152587890625 0.01528167724609375 0.01529693603515625 0.01531982421875 0.01534271240234375 0.01535797119140625 0.015380859375 0.01540374755859375 0.01541900634765625 0.01544189453125 0.0154571533203125 0.01548004150390625 0.0155029296875 0.0155181884765625 0.01554107666015625 0.01555633544921875 0.0155792236328125 0.01560211181640625 0.01561737060546875 0.0156402587890625 0.015655517578125 0.01568603515625 0.0157012939453125 0.015716552734375 0.0157470703125 0.0157623291015625 0.015777587890625 0.0157928466796875 0.0158233642578125 0.015838623046875 0.0158538818359375 0.0158843994140625 0.015899658203125 0.0159149169921875 0.0159454345703125 0.015960693359375 0.0159759521484375 0.0160064697265625 0.016021728515625 0.0160369873046875 0.0160675048828125 0.016082763671875 0.0160980224609375 0.01611328125 0.016143798828125 0.0161590576171875 0.01617431640625 0.016204833984375 0.0162200927734375 0.0162353515625 0.016265869140625 0.0162811279296875 0.01629638671875 0.016326904296875 0.0163421630859375 0.016357421875 0.0163726806640625 0.0164031982421875 0.01641845703125 0.0164337158203125 0.0164642333984375 0.0164794921875 0.0164947509765625 0.0165252685546875 0.01654052734375 0.0165557861328125 0.0165863037109375 0.0166015625 0.0166168212890625 0.0166473388671875 0.01666259765625 0.0166778564453125 0.016693115234375 0.0167236328125 0.0167388916015625 0.016754150390625 0.01678466796875 0.0167999267578125 0.016815185546875 0.016845703125 0.0168609619140625 0.016876220703125 0.01690673828125 0.0169219970703125 0.016937255859375 0.0169525146484375 0.0169830322265625 0.016998291015625 0.0170135498046875 0.0170440673828125 0.017059326171875 0.0170745849609375 0.0171051025390625 0.017120361328125 0.0171356201171875 0.0171661376953125 0.017181396484375 0.0171966552734375 0.0172271728515625 0.017242431640625 0.0172576904296875 0.01727294921875 0.017303466796875 0.0173187255859375 0.017333984375 0.017364501953125 0.0173797607421875 0.01739501953125 0.017425537109375 0.0174407958984375 0.0174560546875 0.017486572265625 0.0175018310546875 0.01751708984375 0.017547607421875 0.0175628662109375 0.017578125 0.0175933837890625 0.0176239013671875 0.01763916015625 0.0176544189453125 0.0176849365234375 0.0177001953125 0.0177154541015625 0.0177459716796875 0.01776123046875 0.0177764892578125 0.0178070068359375 0.017822265625 0.0178375244140625 0.017852783203125 0.01788330078125 0.0178985595703125 0.017913818359375 0.0179443359375 0.0179595947265625 0.017974853515625 0.01800537109375 0.0180206298828125 0.018035888671875 0.01806640625 0.0180816650390625 0.018096923828125 0.01812744140625 0.0181427001953125 0.018157958984375 0.0181732177734375 0.0182037353515625 0.018218994140625 0.0182342529296875 0.0182647705078125 0.018280029296875 0.0182952880859375 0.0183258056640625 0.018341064453125 0.0183563232421875 0.0183868408203125 0.018402099609375 0.0184173583984375 0.0184326171875 0.018463134765625 0.0184783935546875 0.01849365234375 0.018524169921875 0.0185394287109375 0.0185546875 0.018585205078125 0.0186004638671875 0.01861572265625 0.018646240234375 0.0186614990234375 0.0186767578125 0.018707275390625 0.0187225341796875 0.01873779296875 0.0187530517578125 0.0187835693359375 0.018798828125 0.0188140869140625 0.0188446044921875 0.01885986328125 0.0188751220703125 0.0189056396484375 0.0189208984375 0.0189361572265625 0.0189666748046875 0.01898193359375 0.0189971923828125 0.019012451171875 0.01904296875 0.0190582275390625 0.019073486328125 0.01910400390625 0.0191192626953125 0.019134521484375 0.0191650390625 0.0191802978515625 0.019195556640625 0.01922607421875 0.0192413330078125 0.019256591796875 0.019287109375 0.0193023681640625 0.019317626953125 0.0193328857421875 0.0193634033203125 0.019378662109375 0.0193939208984375 0.0194244384765625 0.019439697265625 0.0194549560546875 0.0194854736328125 0.019500732421875 0.0195159912109375 0.0195465087890625 0.019561767578125 0.0195770263671875 0.0196075439453125 0.019622802734375 0.0196380615234375 0.0196533203125 0.019683837890625 0.0196990966796875 0.01971435546875 0.019744873046875 0.0197601318359375 0.019775390625 0.019805908203125 0.0198211669921875 0.01983642578125 0.019866943359375 0.0198822021484375 0.0198974609375 0.0199127197265625 0.0199432373046875 0.01995849609375 0.0199737548828125 0.0200042724609375 0.02001953125 0.0200347900390625 0.0200653076171875 0.02008056640625 0.0200958251953125 0.0201263427734375 0.0201416015625 0.0201568603515625 0.0201873779296875 0.02020263671875 0.0202178955078125 0.020233154296875 0.020263671875 0.0202789306640625 0.020294189453125 0.02032470703125 0.0203399658203125 0.020355224609375 0.0203857421875 0.0204010009765625 0.020416259765625 0.02044677734375 0.0204620361328125 0.020477294921875 0.0204925537109375 0.0205230712890625 0.020538330078125 0.0205535888671875 0.0205841064453125 0.020599365234375 0.0206146240234375 0.0206451416015625 0.020660400390625 0.0206756591796875 0.0207061767578125 0.020721435546875 0.0207366943359375 0.0207672119140625 0.020782470703125 0.0207977294921875 0.02081298828125 0.020843505859375 0.0208587646484375 0.0208740234375 0.020904541015625 0.0209197998046875 0.02093505859375 0.020965576171875 0.0209808349609375 0.02099609375 0.021026611328125 0.0210418701171875 0.02105712890625 0.0210723876953125 0.0211029052734375 0.0211181640625 0.0211334228515625 0.0211639404296875 0.02117919921875 0.0211944580078125 0.0212249755859375 0.021240234375 0.0212554931640625 0.0212860107421875 0.02130126953125 0.0213165283203125 0.0213470458984375 0.0213623046875 0.0213775634765625 0.021392822265625 0.02142333984375 0.0214385986328125 0.021453857421875 0.021484375 0.0214996337890625 0.021514892578125 0.02154541015625 0.0215606689453125 0.021575927734375 0.0216064453125 0.0216217041015625 0.021636962890625 0.02166748046875 0.0216827392578125 0.021697998046875 0.0217132568359375 0.0217437744140625 0.021759033203125 0.0217742919921875 0.0218048095703125 0.021820068359375 0.0218353271484375 0.0218658447265625 0.021881103515625 0.0218963623046875 0.0219268798828125 0.021942138671875 0.0219573974609375 0.02197265625 0.022003173828125 0.0220184326171875 0.02203369140625 0.022064208984375 0.0220794677734375 0.0220947265625 0.022125244140625 0.0221405029296875 0.02215576171875 0.022186279296875 0.0222015380859375 0.022216796875 0.022247314453125 0.0222625732421875 0.02227783203125 0.0222930908203125 0.0223236083984375 0.0223388671875 0.0223541259765625 0.0223846435546875 0.02239990234375 0.0224151611328125 0.0224456787109375 0.0224609375 0.0224761962890625 0.0225067138671875 0.02252197265625 0.0225372314453125 0.022552490234375 0.0225830078125 0.0225982666015625 0.022613525390625 0.02264404296875 0.0226593017578125 0.022674560546875 0.022705078125 0.0227203369140625 0.022735595703125 0.02276611328125 0.0227813720703125 0.022796630859375 0.0228271484375 0.0228424072265625 0.022857666015625 0.0228729248046875 0.0229034423828125 0.022918701171875 0.0229339599609375 0.0229644775390625 0.022979736328125 0.0229949951171875 0.0230255126953125 0.023040771484375 0.0230560302734375 0.0230865478515625 0.023101806640625 0.0231170654296875 0.0231475830078125 0.023162841796875 0.0231781005859375 0.023193359375 0.023223876953125 0.0232391357421875 0.02325439453125 0.023284912109375 0.0233001708984375 0.0233154296875 0.023345947265625 0.0233612060546875 0.02337646484375 0.023406982421875 0.0234222412109375 0.0234375 0.0234527587890625 0.0234832763671875 0.02349853515625 0.0235137939453125 0.0235443115234375 0.0235595703125 0.0235748291015625 0.0236053466796875 0.02362060546875 0.0236358642578125 0.0236663818359375 0.023681640625 0.0236968994140625 0.0237274169921875 0.02374267578125 0.0237579345703125 0.023773193359375 0.0238037109375 0.0238189697265625 0.023834228515625 0.02386474609375 0.0238800048828125 0.023895263671875 0.02392578125 0.0239410400390625 0.023956298828125 0.02398681640625 0.0240020751953125 0.024017333984375 0.0240325927734375 0.0240631103515625 0.024078369140625 0.0240936279296875 0.0241241455078125 0.024139404296875 0.0241546630859375 0.0241851806640625 0.024200439453125 0.0242156982421875 0.0242462158203125 0.024261474609375 0.0242767333984375 0.0243072509765625 0.024322509765625 0.0243377685546875 0.02435302734375 0.024383544921875 0.0243988037109375 0.0244140625 0.024444580078125 0.0244598388671875 0.02447509765625 0.024505615234375 0.0245208740234375 0.0245361328125 0.024566650390625 0.0245819091796875 0.02459716796875 0.0246124267578125 0.0246429443359375 0.024658203125 0.0246734619140625 0.0247039794921875 0.02471923828125 0.0247344970703125 0.0247650146484375 0.0247802734375 0.0247955322265625 0.0248260498046875 0.02484130859375 0.0248565673828125 0.0248870849609375 0.02490234375 0.0249176025390625 0.024932861328125 0.02496337890625 0.0249786376953125 0.024993896484375 0.0250244140625 0.0250396728515625 0.025054931640625 0.02508544921875 0.0251007080078125 0.025115966796875 0.025146484375 0.0251617431640625 0.025177001953125 0.02520751953125 0.0252227783203125 0.025238037109375 0.0252532958984375 0.0252838134765625 0.025299072265625 0.0253143310546875 0.0253448486328125 0.025360107421875 0.0253753662109375 0.0254058837890625 0.025421142578125 0.0254364013671875 0.0254669189453125 0.025482177734375 0.0254974365234375 0.0255126953125 0.025543212890625 0.0255584716796875 0.02557373046875 0.025604248046875 0.0256195068359375 0.025634765625 0.025665283203125 0.0256805419921875 0.02569580078125 0.025726318359375 0.0257415771484375 0.0257568359375 0.025787353515625 0.0258026123046875 0.02581787109375 0.0258331298828125 0.0258636474609375 0.02587890625 0.0258941650390625 0.0259246826171875 0.02593994140625 0.0259552001953125 0.0259857177734375 0.0260009765625 0.0260162353515625 0.0260467529296875 0.02606201171875 0.0260772705078125 0.026092529296875 0.026123046875 0.0261383056640625 0.026153564453125 0.02618408203125 0.0261993408203125 0.026214599609375 0.0262451171875 0.0262603759765625 0.026275634765625 0.02630615234375 0.0263214111328125 0.026336669921875 0.0263671875 0.0263824462890625 0.026397705078125 0.0264129638671875 0.0264434814453125 0.026458740234375 0.0264739990234375 0.0265045166015625 0.026519775390625 0.0265350341796875 0.0265655517578125 0.026580810546875 0.0265960693359375 0.0266265869140625 0.026641845703125 0.0266571044921875 0.0266876220703125 0.026702880859375 0.0267181396484375 0.0267333984375 0.026763916015625 0.0267791748046875 0.02679443359375 0.026824951171875 0.0268402099609375 0.02685546875 0.026885986328125 0.0269012451171875 0.02691650390625 0.026947021484375 0.0269622802734375 0.0269775390625 0.0269927978515625 0.0270233154296875 0.02703857421875 0.0270538330078125 0.0270843505859375 0.027099609375 0.0271148681640625 0.0271453857421875 0.02716064453125 0.0271759033203125 0.0272064208984375 0.0272216796875 0.0272369384765625 0.0272674560546875 0.02728271484375 0.0272979736328125 0.027313232421875 0.02734375 0.0273590087890625 0.027374267578125 0.02740478515625 0.0274200439453125 0.027435302734375 0.0274658203125 0.0274810791015625 0.027496337890625 0.02752685546875 0.0275421142578125 0.027557373046875 0.0275726318359375 0.0276031494140625 0.027618408203125 0.0276336669921875 0.0276641845703125 0.027679443359375 0.0276947021484375 0.0277252197265625 0.027740478515625 0.0277557373046875 0.0277862548828125 0.027801513671875 0.0278167724609375 0.0278472900390625 0.027862548828125 0.0278778076171875 0.02789306640625 0.027923583984375 0.0279388427734375 0.0279541015625 0.027984619140625 0.0279998779296875 0.02801513671875 0.028045654296875 0.0280609130859375 0.028076171875 0.028106689453125 0.0281219482421875 0.02813720703125 0.0281524658203125 0.0281829833984375 0.0281982421875 0.0282135009765625 0.0282440185546875 0.02825927734375 0.0282745361328125 0.0283050537109375 0.0283203125 0.0283355712890625 0.0283660888671875 0.02838134765625 0.0283966064453125 0.0284271240234375 0.0284423828125 0.0284576416015625 0.028472900390625 0.02850341796875 0.0285186767578125 0.028533935546875 0.028564453125 0.0285797119140625 0.028594970703125 0.02862548828125 0.0286407470703125 0.028656005859375 0.0286865234375 0.0287017822265625 0.028717041015625 0.02874755859375 0.0287628173828125 0.028778076171875 0.0287933349609375 0.0288238525390625 0.028839111328125 0.0288543701171875 0.0288848876953125 0.028900146484375 0.0289154052734375 0.0289459228515625 0.028961181640625 0.0289764404296875 0.0290069580078125 0.029022216796875 0.0290374755859375 0.029052734375 0.029083251953125 0.0290985107421875 0.02911376953125 0.029144287109375 0.0291595458984375 0.0291748046875 0.029205322265625 0.0292205810546875 0.02923583984375 0.029266357421875 0.0292816162109375 0.029296875 0.029327392578125 0.0293426513671875 0.02935791015625 0.0293731689453125 0.0294036865234375 0.0294189453125 0.0294342041015625 0.0294647216796875 0.02947998046875 0.0294952392578125 0.0295257568359375 0.029541015625 0.0295562744140625 0.0295867919921875 0.02960205078125 0.0296173095703125 0.029632568359375 0.0296630859375 0.0296783447265625 0.029693603515625 0.02972412109375 0.0297393798828125 0.029754638671875 0.02978515625 0.0298004150390625 0.029815673828125 0.02984619140625 0.0298614501953125 0.029876708984375 0.0299072265625 0.0299224853515625 0.029937744140625 0.0299530029296875 0.0299835205078125 0.029998779296875 0.0300140380859375 0.0300445556640625 0.030059814453125 0.0300750732421875 0.0301055908203125 0.030120849609375 0.0301361083984375 0.0301666259765625 0.030181884765625 0.0301971435546875 0.03021240234375 0.030242919921875 0.0302581787109375 0.0302734375 0.030303955078125 0.0303192138671875 0.03033447265625 0.030364990234375 0.0303802490234375 0.0303955078125 0.030426025390625 0.0304412841796875 0.03045654296875 0.030487060546875 0.0305023193359375 0.030517578125 0.0305328369140625 0.0305633544921875 0.03057861328125 0.0305938720703125 0.0306243896484375 0.0306396484375 0.0306549072265625 0.0306854248046875 0.03070068359375 0.0307159423828125 0.0307464599609375 0.03076171875 0.0307769775390625 0.0308074951171875 0.03082275390625 0.0308380126953125 0.030853271484375 0.0308837890625 0.0308990478515625 0.030914306640625 0.03094482421875 0.0309600830078125 0.030975341796875 0.031005859375 0.0310211181640625 0.031036376953125 0.03106689453125 0.0310821533203125 0.031097412109375 0.0311126708984375 0.0311431884765625 0.031158447265625 0.0311737060546875 0.0312042236328125 0.031219482421875 0.0312347412109375 0.03125 0.031280517578125 0.03131103515625 0.03131103515625 0.031341552734375 0.0313720703125 0.0313720703125 0.031402587890625 0.03143310546875 0.03143310546875 0.031463623046875 0.031494140625 0.031494140625 0.031524658203125 0.03155517578125 0.03155517578125 0.031585693359375 0.031585693359375 0.0316162109375 0.031646728515625 0.031646728515625 0.03167724609375 0.031707763671875 0.031707763671875 0.03173828125 0.031768798828125 0.031768798828125 0.03179931640625 0.031829833984375 0.031829833984375 0.0318603515625 0.031890869140625 0.031890869140625 0.03192138671875 0.031951904296875 0.031951904296875 0.031982421875 0.032012939453125 0.032012939453125 0.03204345703125 0.032073974609375 0.032073974609375 0.0321044921875 0.032135009765625 0.032135009765625 0.03216552734375 0.03216552734375 0.032196044921875 0.0322265625 0.0322265625 0.032257080078125 0.03228759765625 0.03228759765625 0.032318115234375 0.0323486328125 0.0323486328125 0.032379150390625 0.03240966796875 0.03240966796875 0.032440185546875 0.032470703125 0.032470703125 0.032501220703125 0.03253173828125 0.03253173828125 0.032562255859375 0.0325927734375 0.0325927734375 0.032623291015625 0.03265380859375 0.03265380859375 0.032684326171875 0.03271484375 0.03271484375 0.032745361328125 0.032745361328125 0.03277587890625 0.032806396484375 0.032806396484375 0.0328369140625 0.032867431640625 0.032867431640625 0.03289794921875 0.032928466796875 0.032928466796875 0.032958984375 0.032989501953125 0.032989501953125 0.03302001953125 0.033050537109375 0.033050537109375 0.0330810546875 0.033111572265625 0.033111572265625 0.03314208984375 0.033172607421875 0.033172607421875 0.033203125 0.033233642578125 0.033233642578125 0.03326416015625 0.033294677734375 0.033294677734375 0.0333251953125 0.0333251953125 0.033355712890625 0.03338623046875 0.03338623046875 0.033416748046875 0.033447265625 0.033447265625 0.033477783203125 0.03350830078125 0.03350830078125 0.033538818359375 0.0335693359375 0.0335693359375 0.033599853515625 0.03363037109375 0.03363037109375 0.033660888671875 0.03369140625 0.03369140625 0.033721923828125 0.03375244140625 0.03375244140625 0.033782958984375 0.0338134765625 0.0338134765625 0.033843994140625 0.03387451171875 0.03387451171875 0.033905029296875 0.033905029296875 0.033935546875 0.033966064453125 0.033966064453125 0.03399658203125 0.034027099609375 0.034027099609375 0.0340576171875 0.034088134765625 0.034088134765625 0.03411865234375 0.034149169921875 0.034149169921875 0.0341796875 0.034210205078125 0.034210205078125 0.03424072265625 0.034271240234375 0.034271240234375 0.0343017578125 0.034332275390625 0.034332275390625 0.03436279296875 0.034393310546875 0.034393310546875 0.034423828125 0.034454345703125 0.034454345703125 0.03448486328125 0.03448486328125 0.034515380859375 0.0345458984375 0.0345458984375 0.034576416015625 0.03460693359375 0.03460693359375 0.034637451171875 0.03466796875 0.03466796875 0.034698486328125 0.03472900390625 0.03472900390625 0.034759521484375 0.0347900390625 0.0347900390625 0.034820556640625 0.03485107421875 0.03485107421875 0.034881591796875 0.034912109375 0.034912109375 0.034942626953125 0.03497314453125 0.03497314453125 0.035003662109375 0.0350341796875 0.0350341796875 0.035064697265625 0.03509521484375 0.03509521484375 0.035125732421875 0.035125732421875 0.03515625 0.035186767578125 0.035186767578125 0.03521728515625 0.035247802734375 0.035247802734375 0.0352783203125 0.035308837890625 0.035308837890625 0.03533935546875 0.035369873046875 0.035369873046875 0.035400390625 0.035430908203125 0.035430908203125 0.03546142578125 0.035491943359375 0.035491943359375 0.0355224609375 0.035552978515625 0.035552978515625 0.03558349609375 0.035614013671875 0.035614013671875 0.03564453125 0.035675048828125 0.035675048828125 0.03570556640625 0.03570556640625 0.035736083984375 0.0357666015625 0.0357666015625 0.035797119140625 0.03582763671875 0.03582763671875 0.035858154296875 0.035888671875 0.035888671875 0.035919189453125 0.03594970703125 0.03594970703125 0.035980224609375 0.0360107421875 0.0360107421875 0.036041259765625 0.03607177734375 0.03607177734375 0.036102294921875 0.0361328125 0.0361328125 0.036163330078125 0.03619384765625 0.03619384765625 0.036224365234375 0.0362548828125 0.0362548828125 0.036285400390625 0.036285400390625 0.03631591796875 0.036346435546875 0.036346435546875 0.036376953125 0.036407470703125 0.036407470703125 0.03643798828125 0.036468505859375 0.036468505859375 0.0364990234375 0.036529541015625 0.036529541015625 0.03656005859375 0.036590576171875 0.036590576171875 0.03662109375 0.036651611328125 0.036651611328125 0.03668212890625 0.036712646484375 0.036712646484375 0.0367431640625 0.036773681640625 0.036773681640625 0.03680419921875 0.036834716796875 0.036834716796875 0.036865234375 0.036865234375 0.036895751953125 0.03692626953125 0.03692626953125 0.036956787109375 0.0369873046875 0.0369873046875 0.037017822265625 0.03704833984375 0.03704833984375 0.037078857421875 0.037109375 0.037109375 0.037139892578125 0.03717041015625 0.03717041015625 0.037200927734375 0.0372314453125 0.0372314453125 0.037261962890625 0.03729248046875 0.03729248046875 0.037322998046875 0.037353515625 0.037353515625 0.037384033203125 0.03741455078125 0.03741455078125 0.037445068359375 0.037445068359375 0.0374755859375 0.037506103515625 0.037506103515625 0.03753662109375 0.037567138671875 0.037567138671875 0.03759765625 0.037628173828125 0.037628173828125 0.03765869140625 0.037689208984375 0.037689208984375 0.0377197265625 0.037750244140625 0.037750244140625 0.03778076171875 0.037811279296875 0.037811279296875 0.037841796875 0.037872314453125 0.037872314453125 0.03790283203125 0.037933349609375 0.037933349609375 0.0379638671875 0.037994384765625 0.037994384765625 0.03802490234375 0.03802490234375 0.038055419921875 0.0380859375 0.0380859375 0.038116455078125 0.03814697265625 0.03814697265625 0.038177490234375 0.0382080078125 0.0382080078125 0.038238525390625 0.03826904296875 0.03826904296875 0.038299560546875 0.038330078125 0.038330078125 0.038360595703125 0.03839111328125 0.03839111328125 0.038421630859375 0.0384521484375 0.0384521484375 0.038482666015625 0.03851318359375 0.03851318359375 0.038543701171875 0.03857421875 0.03857421875 0.038604736328125 0.03863525390625 0.03863525390625 0.038665771484375 0.038665771484375 0.0386962890625 0.038726806640625 0.038726806640625 0.03875732421875 0.038787841796875 0.038787841796875 0.038818359375 0.038848876953125 0.038848876953125 0.03887939453125 0.038909912109375 0.038909912109375 0.0389404296875 0.038970947265625 0.038970947265625 0.03900146484375 0.039031982421875 0.039031982421875 0.0390625 0.039093017578125 0.039093017578125 0.03912353515625 0.039154052734375 0.039154052734375 0.0391845703125 0.039215087890625 0.039215087890625 0.03924560546875 0.03924560546875 0.039276123046875 0.039306640625 0.039306640625 0.039337158203125 0.03936767578125 0.03936767578125 0.039398193359375 0.0394287109375 0.0394287109375 0.039459228515625 0.03948974609375 0.03948974609375 0.039520263671875 0.03955078125 0.03955078125 0.039581298828125 0.03961181640625 0.03961181640625 0.039642333984375 0.0396728515625 0.0396728515625 0.039703369140625 0.03973388671875 0.03973388671875 0.039764404296875 0.039794921875 0.039794921875 0.039825439453125 0.039825439453125 0.03985595703125 0.039886474609375 0.039886474609375 0.0399169921875 0.039947509765625 0.039947509765625 0.03997802734375 0.040008544921875 0.040008544921875 0.0400390625 0.040069580078125 0.040069580078125 0.04010009765625 0.040130615234375 0.040130615234375 0.0401611328125 0.040191650390625 0.040191650390625 0.04022216796875 0.040252685546875 0.040252685546875 0.040283203125 0.040313720703125 0.040313720703125 0.04034423828125 0.040374755859375 0.040374755859375 0.0404052734375 0.0404052734375 0.040435791015625 0.04046630859375 0.04046630859375 0.040496826171875 0.04052734375 0.04052734375 0.040557861328125 0.04058837890625 0.04058837890625 0.040618896484375 0.0406494140625 0.0406494140625 0.040679931640625 0.04071044921875 0.04071044921875 0.040740966796875 0.040771484375 0.040771484375 0.040802001953125 0.04083251953125 0.04083251953125 0.040863037109375 0.0408935546875 0.0408935546875 0.040924072265625 0.04095458984375 0.04095458984375 0.04095458984375 0.040985107421875 0.041046142578125 0.041046142578125 0.041046142578125 0.04107666015625 0.041107177734375 0.041107177734375 0.041107177734375 0.041168212890625 0.04119873046875 0.04119873046875 0.04119873046875 0.041229248046875 0.041290283203125 0.041290283203125 0.041290283203125 0.04132080078125 0.041351318359375 0.041351318359375 0.041351318359375 0.041412353515625 0.04144287109375 0.04144287109375 0.04144287109375 0.041473388671875 0.041534423828125 0.041534423828125 0.041534423828125 0.04156494140625 0.041595458984375 0.041595458984375 0.041595458984375 0.0416259765625 0.04168701171875 0.04168701171875 0.04168701171875 0.041717529296875 0.041748046875 0.041748046875 0.041748046875 0.04180908203125 0.041839599609375 0.041839599609375 0.041839599609375 0.0418701171875 0.04193115234375 0.04193115234375 0.04193115234375 0.041961669921875 0.0419921875 0.0419921875 0.0419921875 0.04205322265625 0.042083740234375 0.042083740234375 0.042083740234375 0.0421142578125 0.042144775390625 0.042144775390625 0.042144775390625 0.042205810546875 0.042236328125 0.042236328125 0.042236328125 0.042266845703125 0.042327880859375 0.042327880859375 0.042327880859375 0.0423583984375 0.042388916015625 0.042388916015625 0.042388916015625 0.042449951171875 0.04248046875 0.04248046875 0.04248046875 0.042510986328125 0.042572021484375 0.042572021484375 0.042572021484375 0.0426025390625 0.042633056640625 0.042633056640625 0.042633056640625 0.042694091796875 0.042724609375 0.042724609375 0.042724609375 0.042755126953125 0.04278564453125 0.04278564453125 0.04278564453125 0.0428466796875 0.042877197265625 0.042877197265625 0.042877197265625 0.04290771484375 0.04296875 0.04296875 0.04296875 0.042999267578125 0.04302978515625 0.04302978515625 0.04302978515625 0.0430908203125 0.043121337890625 0.043121337890625 0.043121337890625 0.04315185546875 0.043212890625 0.043212890625 0.043212890625 0.043243408203125 0.04327392578125 0.04327392578125 0.04327392578125 0.0433349609375 0.043365478515625 0.043365478515625 0.043365478515625 0.04339599609375 0.043426513671875 0.043426513671875 0.043426513671875 0.043487548828125 0.04351806640625 0.04351806640625 0.04351806640625 0.043548583984375 0.043609619140625 0.043609619140625 0.043609619140625 0.04364013671875 0.043670654296875 0.043670654296875 0.043670654296875 0.043731689453125 0.04376220703125 0.04376220703125 0.04376220703125 0.043792724609375 0.043853759765625 0.043853759765625 0.043853759765625 0.04388427734375 0.043914794921875 0.043914794921875 0.043914794921875 0.0439453125 0.04400634765625 0.04400634765625 0.04400634765625 0.044036865234375 0.0440673828125 0.0440673828125 0.0440673828125 0.04412841796875 0.044158935546875 0.044158935546875 0.044158935546875 0.044189453125 0.04425048828125 0.04425048828125 0.04425048828125 0.044281005859375 0.0443115234375 0.0443115234375 0.0443115234375 0.04437255859375 0.044403076171875 0.044403076171875 0.044403076171875 0.04443359375 0.04449462890625 0.04449462890625 0.04449462890625 0.044525146484375 0.0445556640625 0.0445556640625 0.0445556640625 0.044586181640625 0.044647216796875 0.044647216796875 0.044647216796875 0.044677734375 0.044708251953125 0.044708251953125 0.044708251953125 0.044769287109375 0.0447998046875 0.0447998046875 0.0447998046875 0.044830322265625 0.044891357421875 0.044891357421875 0.044891357421875 0.044921875 0.044952392578125 0.044952392578125 0.044952392578125 0.045013427734375 0.0450439453125 0.0450439453125 0.0450439453125 0.045074462890625 0.04510498046875 0.04510498046875 0.04510498046875 0.045166015625 0.045196533203125 0.045196533203125 0.045196533203125 0.04522705078125 0.0452880859375 0.0452880859375 0.0452880859375 0.045318603515625 0.04534912109375 0.04534912109375 0.04534912109375 0.04541015625 0.045440673828125 0.045440673828125 0.045440673828125 0.04547119140625 0.0455322265625 0.0455322265625 0.0455322265625 0.045562744140625 0.04559326171875 0.04559326171875 0.04559326171875 0.045654296875 0.045684814453125 0.045684814453125 0.045684814453125 0.04571533203125 0.045745849609375 0.045745849609375 0.045745849609375 0.045806884765625 0.04583740234375 0.04583740234375 0.04583740234375 0.045867919921875 0.045928955078125 0.045928955078125 0.045928955078125 0.04595947265625 0.045989990234375 0.045989990234375 0.045989990234375 0.046051025390625 0.04608154296875 0.04608154296875 0.04608154296875 0.046112060546875 0.046173095703125 0.046173095703125 0.046173095703125 0.04620361328125 0.046234130859375 0.046234130859375 0.046234130859375 0.046295166015625 0.04632568359375 0.04632568359375 0.04632568359375 0.046356201171875 0.04638671875 0.04638671875 0.04638671875 0.04644775390625 0.046478271484375 0.046478271484375 0.046478271484375 0.0465087890625 0.04656982421875 0.04656982421875 0.04656982421875 0.046600341796875 0.046630859375 0.046630859375 0.046630859375 0.04669189453125 0.046722412109375 0.046722412109375 0.046722412109375 0.0467529296875 0.04681396484375 0.04681396484375 0.04681396484375 0.046844482421875 0.046875 0.046875 0.046875 0.046905517578125 0.046966552734375 0.046966552734375 0.046966552734375 0.0469970703125 0.047027587890625 0.047027587890625 0.047027587890625 0.047088623046875 0.047119140625 0.047119140625 0.047119140625 0.047149658203125 0.047210693359375 0.047210693359375 0.047210693359375 0.0472412109375 0.047271728515625 0.047271728515625 0.047271728515625 0.047332763671875 0.04736328125 0.04736328125 0.04736328125 0.047393798828125 0.047454833984375 0.047454833984375 0.047454833984375 0.0474853515625 0.047515869140625 0.047515869140625 0.047515869140625 0.04754638671875 0.047607421875 0.047607421875 0.047607421875 0.047637939453125 0.04766845703125 0.04766845703125 0.04766845703125 0.0477294921875 0.047760009765625 0.047760009765625 0.047760009765625 0.04779052734375 0.0478515625 0.0478515625 0.0478515625 0.047882080078125 0.04791259765625 0.04791259765625 0.04791259765625 0.0479736328125 0.048004150390625 0.048004150390625 0.048004150390625 0.04803466796875 0.048065185546875 0.048065185546875 0.048065185546875 0.048126220703125 0.04815673828125 0.04815673828125 0.04815673828125 0.048187255859375 0.048248291015625 0.048248291015625 0.048248291015625 0.04827880859375 0.048309326171875 0.048309326171875 0.048309326171875 0.048370361328125 0.04840087890625 0.04840087890625 0.04840087890625 0.048431396484375 0.048492431640625 0.048492431640625 0.048492431640625 0.04852294921875 0.048553466796875 0.048553466796875 0.048553466796875 0.048614501953125 0.04864501953125 0.04864501953125 0.04864501953125 0.048675537109375 0.0487060546875 0.0487060546875 0.0487060546875 0.04876708984375 0.048797607421875 0.048797607421875 0.048797607421875 0.048828125 0.04888916015625 0.04888916015625 0.04888916015625 0.048919677734375 0.0489501953125 0.0489501953125 0.0489501953125 0.04901123046875 0.049041748046875 0.049041748046875 0.049041748046875 0.049072265625 0.04913330078125 0.04913330078125 0.04913330078125 0.049163818359375 0.0491943359375 0.0491943359375 0.0491943359375 0.049224853515625 0.049285888671875 0.049285888671875 0.049285888671875 0.04931640625 0.049346923828125 0.049346923828125 0.049346923828125 0.049407958984375 0.0494384765625 0.0494384765625 0.0494384765625 0.049468994140625 0.049530029296875 0.049530029296875 0.049530029296875 0.049560546875 0.049591064453125 0.049591064453125 0.049591064453125 0.049652099609375 0.0496826171875 0.0496826171875 0.0496826171875 0.049713134765625 0.049774169921875 0.049774169921875 0.049774169921875 0.0498046875 0.049835205078125 0.049835205078125 0.049835205078125 0.04986572265625 0.0499267578125 0.0499267578125 0.0499267578125 0.049957275390625 0.04998779296875 0.04998779296875 0.04998779296875 0.050048828125 0.050079345703125 0.050079345703125 0.050079345703125 0.05010986328125 0.0501708984375 0.0501708984375 0.0501708984375 0.050201416015625 0.05023193359375 0.05023193359375 0.05023193359375 0.05029296875 0.050323486328125 0.050323486328125 0.050323486328125 0.05035400390625 0.0504150390625 0.0504150390625 0.0504150390625 0.050445556640625 0.05047607421875 0.05047607421875 0.05047607421875 0.050506591796875 0.050567626953125 0.050567626953125 0.050567626953125 0.05059814453125 0.050628662109375 0.050628662109375 0.050628662109375 0.050689697265625 0.05072021484375 0.05072021484375 0.05072021484375 0.050750732421875 0.050811767578125 0.050811767578125 0.050811767578125 0.05084228515625 0.050872802734375 0.050872802734375 0.050872802734375 0.050933837890625 0.05096435546875 0.05096435546875 0.05096435546875 0.050994873046875 0.051025390625 0.051025390625 0.051025390625 0.05108642578125 0.051116943359375 0.051116943359375 0.051116943359375 0.0511474609375 0.05120849609375 0.05120849609375 0.05120849609375 0.051239013671875 0.05126953125 0.05126953125 0.05126953125 0.05133056640625 0.051361083984375 0.051361083984375 0.051361083984375 0.0513916015625 0.05145263671875 0.05145263671875 0.05145263671875 0.051483154296875 0.051513671875 0.051513671875 0.051513671875 0.05157470703125 0.051605224609375 0.051605224609375 0.051605224609375 0.0516357421875 0.051666259765625 0.051666259765625 0.051666259765625 0.051727294921875 0.0517578125 0.0517578125 0.0517578125 0.051788330078125 0.051849365234375 0.051849365234375 0.051849365234375 0.0518798828125 0.051910400390625 0.051910400390625 0.051910400390625 0.051971435546875 0.052001953125 0.052001953125 0.052001953125 0.052032470703125 0.052093505859375 0.052093505859375 0.052093505859375 0.0521240234375 0.052154541015625 0.052154541015625 0.052154541015625 0.05218505859375 0.05224609375 0.05224609375 0.05224609375 0.052276611328125 0.05230712890625 0.05230712890625 0.05230712890625 0.0523681640625 0.052398681640625 0.052398681640625 0.052398681640625 0.05242919921875 0.052490234375 0.052490234375 0.052490234375 0.052520751953125 0.05255126953125 0.05255126953125 0.05255126953125 0.0526123046875 0.052642822265625 0.052642822265625 0.052642822265625 0.05267333984375 0.052734375 0.052734375 0.052734375 0.052764892578125 0.05279541015625 0.05279541015625 0.05279541015625 0.052825927734375 0.052886962890625 0.052886962890625 0.052886962890625 0.05291748046875 0.052947998046875 0.052947998046875 0.052947998046875 0.053009033203125 0.05303955078125 0.05303955078125 0.05303955078125 0.053070068359375 0.053131103515625 0.053131103515625 0.053131103515625 0.05316162109375 0.053192138671875 0.053192138671875 0.053192138671875 0.053253173828125 0.05328369140625 0.05328369140625 0.05328369140625 0.053314208984375 0.053375244140625 0.053375244140625 0.053375244140625 0.05340576171875 0.053436279296875 0.053436279296875 0.053436279296875 0.053466796875 0.05352783203125 0.05352783203125 0.05352783203125 0.053558349609375 0.0535888671875 0.0535888671875 0.0535888671875 0.05364990234375 0.053680419921875 0.053680419921875 0.053680419921875 0.0537109375 0.05377197265625 0.05377197265625 0.05377197265625 0.053802490234375 0.0538330078125 0.0538330078125 0.0538330078125 0.05389404296875 0.053924560546875 0.053924560546875 0.053924560546875 0.053955078125 0.053985595703125 0.053985595703125 0.053985595703125 0.054046630859375 0.0540771484375 0.0540771484375 0.0540771484375 0.054107666015625 0.054168701171875 0.054168701171875 0.054168701171875 0.05419921875 0.054229736328125 0.054229736328125 0.054229736328125 0.054290771484375 0.0543212890625 0.0543212890625 0.0543212890625 0.054351806640625 0.054412841796875 0.054412841796875 0.054412841796875 0.054443359375 0.054473876953125 0.054473876953125 0.054473876953125 0.054534912109375 0.0545654296875 0.0545654296875 0.0545654296875 0.054595947265625 0.05462646484375 0.05462646484375 0.05462646484375 0.0546875 0.054718017578125 0.054718017578125 0.054718017578125 0.05474853515625 0.0548095703125 0.0548095703125 0.0548095703125 0.054840087890625 0.05487060546875 0.05487060546875 0.05487060546875 0.054931640625 0.054962158203125 0.054962158203125 0.054962158203125 0.05499267578125 0.0550537109375 0.0550537109375 0.0550537109375 0.055084228515625 0.05511474609375 0.05511474609375 0.05511474609375 0.055145263671875 0.055206298828125 0.055206298828125 0.055206298828125 0.05523681640625 0.055267333984375 0.055267333984375 0.055267333984375 0.055328369140625 0.05535888671875 0.05535888671875 0.05535888671875 0.055389404296875 0.055450439453125 0.055450439453125 0.055450439453125 0.05548095703125 0.055511474609375 0.055511474609375 0.055511474609375 0.055572509765625 0.05560302734375 0.05560302734375 0.05560302734375 0.055633544921875 0.055694580078125 0.055694580078125 0.055694580078125 0.05572509765625 0.055755615234375 0.055755615234375 0.055755615234375 0.0557861328125 0.05584716796875 0.05584716796875 0.05584716796875 0.055877685546875 0.055908203125 0.055908203125 0.055908203125 0.05596923828125 0.055999755859375 0.055999755859375 0.055999755859375 0.0560302734375 0.05609130859375 0.05609130859375 0.05609130859375 0.056121826171875 0.05615234375 0.05615234375 0.05615234375 0.05621337890625 0.056243896484375 0.056243896484375 0.056243896484375 0.0562744140625 0.056304931640625 0.056304931640625 0.056304931640625 0.056365966796875 0.056396484375 0.056396484375 0.056396484375 0.056427001953125 0.056488037109375 0.056488037109375 0.056488037109375 0.0565185546875 0.056549072265625 0.056549072265625 0.056549072265625 0.056610107421875 0.056640625 0.056640625 0.056640625 0.056671142578125 0.056732177734375 0.056732177734375 0.056732177734375 0.0567626953125 0.056793212890625 0.056793212890625 0.056793212890625 0.056854248046875 0.056884765625 0.056884765625 0.056884765625 0.056915283203125 0.05694580078125 0.05694580078125 0.05694580078125 0.0570068359375 0.057037353515625 0.057037353515625 0.057037353515625 0.05706787109375 0.05712890625 0.05712890625 0.05712890625 0.057159423828125 0.05718994140625 0.05718994140625 0.05718994140625 0.0572509765625 0.057281494140625 0.057281494140625 0.057281494140625 0.05731201171875 0.057373046875 0.057373046875 0.057373046875 0.057403564453125 0.05743408203125 0.05743408203125 0.05743408203125 0.0574951171875 0.057525634765625 0.057525634765625 0.057525634765625 0.05755615234375 0.057586669921875 0.057586669921875 0.057586669921875 0.057647705078125 0.05767822265625 0.05767822265625 0.05767822265625 0.057708740234375 0.057769775390625 0.057769775390625 0.057769775390625 0.05780029296875 0.057830810546875 0.057830810546875 0.057830810546875 0.057891845703125 0.05792236328125 0.05792236328125 0.05792236328125 0.057952880859375 0.058013916015625 0.058013916015625 0.058013916015625 0.05804443359375 0.058074951171875 0.058074951171875 0.058074951171875 0.05810546875 0.05816650390625 0.05816650390625 0.05816650390625 0.058197021484375 0.0582275390625 0.0582275390625 0.0582275390625 0.05828857421875 0.058319091796875 0.058319091796875 0.058319091796875 0.058349609375 0.05841064453125 0.05841064453125 0.05841064453125 0.058441162109375 0.0584716796875 0.0584716796875 0.0584716796875 0.05853271484375 0.058563232421875 0.058563232421875 0.058563232421875 0.05859375 0.05865478515625 0.05865478515625 0.05865478515625 0.058685302734375 0.0587158203125 0.0587158203125 0.0587158203125 0.058746337890625 0.058807373046875 0.058807373046875 0.058807373046875 0.058837890625 0.058868408203125 0.058868408203125 0.058868408203125 0.058929443359375 0.0589599609375 0.0589599609375 0.0589599609375 0.058990478515625 0.059051513671875 0.059051513671875 0.059051513671875 0.05908203125 0.059112548828125 0.059112548828125 0.059112548828125 0.059173583984375 0.0592041015625 0.0592041015625 0.0592041015625 0.059234619140625 0.05926513671875 0.05926513671875 0.05926513671875 0.059326171875 0.059356689453125 0.059356689453125 0.059356689453125 0.05938720703125 0.0594482421875 0.0594482421875 0.0594482421875 0.059478759765625 0.05950927734375 0.05950927734375 0.05950927734375 0.0595703125 0.059600830078125 0.059600830078125 0.059600830078125 0.05963134765625 0.0596923828125 0.0596923828125 0.0596923828125 0.059722900390625 0.05975341796875 0.05975341796875 0.05975341796875 0.059814453125 0.059844970703125 0.059844970703125 0.059844970703125 0.05987548828125 0.059906005859375 0.059906005859375 0.059906005859375 0.059967041015625 0.05999755859375 0.05999755859375 0.05999755859375 0.060028076171875 0.060089111328125 0.060089111328125 0.060089111328125 0.06011962890625 0.060150146484375 0.060150146484375 0.060150146484375 0.060211181640625 0.06024169921875 0.06024169921875 0.06024169921875 0.060272216796875 0.060333251953125 0.060333251953125 0.060333251953125 0.06036376953125 0.060394287109375 0.060394287109375 0.060394287109375 0.0604248046875 0.06048583984375 0.06048583984375 0.06048583984375 0.060516357421875 0.060546875 0.060546875 0.060546875 0.06060791015625 0.060638427734375 0.060638427734375 0.060638427734375 0.0606689453125 0.06072998046875 0.06072998046875 0.06072998046875 0.060760498046875 0.060791015625 0.060791015625 0.060791015625 0.06085205078125 0.060882568359375 0.060882568359375 0.060882568359375 0.0609130859375 0.06097412109375 0.06097412109375 0.06097412109375 0.061004638671875 0.06103515625 0.06103515625 0.06103515625 0.061065673828125 0.061126708984375 0.061126708984375 0.061126708984375 0.0611572265625 0.061187744140625 0.061187744140625 0.061187744140625 0.061248779296875 0.061279296875 0.061279296875 0.061279296875 0.061309814453125 0.061370849609375 0.061370849609375 0.061370849609375 0.0614013671875 0.061431884765625 0.061431884765625 0.061431884765625 0.061492919921875 0.0615234375 0.0615234375 0.0615234375 0.061553955078125 0.061614990234375 0.061614990234375 0.061614990234375 0.0616455078125 0.061676025390625 0.061676025390625 0.061676025390625 0.06170654296875 0.061767578125 0.061767578125 0.061767578125 0.061798095703125 0.06182861328125 0.06182861328125 0.06182861328125 0.0618896484375 0.061920166015625 0.061920166015625 0.061920166015625 0.06195068359375 0.06201171875 0.06201171875 0.06201171875 0.062042236328125 0.06207275390625 0.06207275390625 0.06207275390625 0.0621337890625 0.062164306640625 0.062164306640625 0.062164306640625 0.06219482421875 0.062225341796875 0.062225341796875 0.062225341796875 0.062286376953125 0.06231689453125 0.06231689453125 0.06231689453125 0.062347412109375 0.062408447265625 0.062408447265625 0.062408447265625 0.06243896484375 0.062469482421875 0.062469482421875 0.062469482421875 0.0625 0.06256103515625 0.06256103515625 0.06256103515625 0.0626220703125 0.0626220703125 0.0626220703125 0.0626220703125 0.06268310546875 0.062744140625 0.062744140625 0.062744140625 0.062744140625 0.06280517578125 0.06280517578125 0.06280517578125 0.0628662109375 0.0628662109375 0.0628662109375 0.0628662109375 0.06292724609375 0.06298828125 0.06298828125 0.06298828125 0.06298828125 0.06304931640625 0.06304931640625 0.06304931640625 0.0631103515625 0.0631103515625 0.0631103515625 0.0631103515625 0.06317138671875 0.06317138671875 0.06317138671875 0.06317138671875 0.063232421875 0.06329345703125 0.06329345703125 0.06329345703125 0.06329345703125 0.0633544921875 0.0633544921875 0.0633544921875 0.06341552734375 0.06341552734375 0.06341552734375 0.06341552734375 0.0634765625 0.06353759765625 0.06353759765625 0.06353759765625 0.06353759765625 0.0635986328125 0.0635986328125 0.0635986328125 0.06365966796875 0.06365966796875 0.06365966796875 0.06365966796875 0.063720703125 0.06378173828125 0.06378173828125 0.06378173828125 0.06378173828125 0.0638427734375 0.0638427734375 0.0638427734375 0.06390380859375 0.06390380859375 0.06390380859375 0.06390380859375 0.06396484375 0.06402587890625 0.06402587890625 0.06402587890625 0.06402587890625 0.0640869140625 0.0640869140625 0.0640869140625 0.06414794921875 0.06414794921875 0.06414794921875 0.06414794921875 0.064208984375 0.06427001953125 0.06427001953125 0.06427001953125 0.06427001953125 0.0643310546875 0.0643310546875 0.0643310546875 0.0643310546875 0.06439208984375 0.06439208984375 0.06439208984375 0.064453125 0.064453125 0.064453125 0.064453125 0.06451416015625 0.0645751953125 0.0645751953125 0.0645751953125 0.0645751953125 0.06463623046875 0.06463623046875 0.06463623046875 0.064697265625 0.064697265625 0.064697265625 0.064697265625 0.06475830078125 0.0648193359375 0.0648193359375 0.0648193359375 0.0648193359375 0.06488037109375 0.06488037109375 0.06488037109375 0.06494140625 0.06494140625 0.06494140625 0.06494140625 0.06500244140625 0.0650634765625 0.0650634765625 0.0650634765625 0.0650634765625 0.06512451171875 0.06512451171875 0.06512451171875 0.065185546875 0.065185546875 0.065185546875 0.065185546875 0.06524658203125 0.0653076171875 0.0653076171875 0.0653076171875 0.0653076171875 0.06536865234375 0.06536865234375 0.06536865234375 0.0654296875 0.0654296875 0.0654296875 0.0654296875 0.06549072265625 0.06549072265625 0.06549072265625 0.06549072265625 0.0655517578125 0.06561279296875 0.06561279296875 0.06561279296875 0.06561279296875 0.065673828125 0.065673828125 0.065673828125 0.06573486328125 0.06573486328125 0.06573486328125 0.06573486328125 0.0657958984375 0.06585693359375 0.06585693359375 0.06585693359375 0.06585693359375 0.06591796875 0.06591796875 0.06591796875 0.06597900390625 0.06597900390625 0.06597900390625 0.06597900390625 0.0660400390625 0.06610107421875 0.06610107421875 0.06610107421875 0.06610107421875 0.066162109375 0.066162109375 0.066162109375 0.06622314453125 0.06622314453125 0.06622314453125 0.06622314453125 0.0662841796875 0.06634521484375 0.06634521484375 0.06634521484375 0.06634521484375 0.06640625 0.06640625 0.06640625 0.06646728515625 0.06646728515625 0.06646728515625 0.06646728515625 0.0665283203125 0.06658935546875 0.06658935546875 0.06658935546875 0.06658935546875 0.066650390625 0.066650390625 0.066650390625 0.066650390625 0.06671142578125 0.06671142578125 0.06671142578125 0.0667724609375 0.0667724609375 0.0667724609375 0.0667724609375 0.06683349609375 0.06689453125 0.06689453125 0.06689453125 0.06689453125 0.06695556640625 0.06695556640625 0.06695556640625 0.0670166015625 0.0670166015625 0.0670166015625 0.0670166015625 0.06707763671875 0.067138671875 0.067138671875 0.067138671875 0.067138671875 0.06719970703125 0.06719970703125 0.06719970703125 0.0672607421875 0.0672607421875 0.0672607421875 0.0672607421875 0.06732177734375 0.0673828125 0.0673828125 0.0673828125 0.0673828125 0.06744384765625 0.06744384765625 0.06744384765625 0.0675048828125 0.0675048828125 0.0675048828125 0.0675048828125 0.06756591796875 0.067626953125 0.067626953125 0.067626953125 0.067626953125 0.06768798828125 0.06768798828125 0.06768798828125 0.0677490234375 0.0677490234375 0.0677490234375 0.0677490234375 0.06781005859375 0.06781005859375 0.06781005859375 0.06781005859375 0.06787109375 0.06793212890625 0.06793212890625 0.06793212890625 0.06793212890625 0.0679931640625 0.0679931640625 0.0679931640625 0.06805419921875 0.06805419921875 0.06805419921875 0.06805419921875 0.068115234375 0.06817626953125 0.06817626953125 0.06817626953125 0.06817626953125 0.0682373046875 0.0682373046875 0.0682373046875 0.06829833984375 0.06829833984375 0.06829833984375 0.06829833984375 0.068359375 0.06842041015625 0.06842041015625 0.06842041015625 0.06842041015625 0.0684814453125 0.0684814453125 0.0684814453125 0.06854248046875 0.06854248046875 0.06854248046875 0.06854248046875 0.068603515625 0.06866455078125 0.06866455078125 0.06866455078125 0.06866455078125 0.0687255859375 0.0687255859375 0.0687255859375 0.06878662109375 0.06878662109375 0.06878662109375 0.06878662109375 0.06884765625 0.06890869140625 0.06890869140625 0.06890869140625 0.06890869140625 0.0689697265625 0.0689697265625 0.0689697265625 0.0689697265625 0.06903076171875 0.06903076171875 0.06903076171875 0.069091796875 0.069091796875 0.069091796875 0.069091796875 0.06915283203125 0.0692138671875 0.0692138671875 0.0692138671875 0.0692138671875 0.06927490234375 0.06927490234375 0.06927490234375 0.0693359375 0.0693359375 0.0693359375 0.0693359375 0.06939697265625 0.0694580078125 0.0694580078125 0.0694580078125 0.0694580078125 0.06951904296875 0.06951904296875 0.06951904296875 0.069580078125 0.069580078125 0.069580078125 0.069580078125 0.06964111328125 0.0697021484375 0.0697021484375 0.0697021484375 0.0697021484375 0.06976318359375 0.06976318359375 0.06976318359375 0.06982421875 0.06982421875 0.06982421875 0.06982421875 0.06988525390625 0.0699462890625 0.0699462890625 0.0699462890625 0.0699462890625 0.07000732421875 0.07000732421875 0.07000732421875 0.070068359375 0.070068359375 0.070068359375 0.070068359375 0.07012939453125 0.0701904296875 0.0701904296875 0.0701904296875 0.0701904296875 0.07025146484375 0.07025146484375 0.07025146484375 0.07025146484375 0.0703125 0.0703125 0.0703125 0.07037353515625 0.07037353515625 0.07037353515625 0.07037353515625 0.0704345703125 0.07049560546875 0.07049560546875 0.07049560546875 0.07049560546875 0.070556640625 0.070556640625 0.070556640625 0.07061767578125 0.07061767578125 0.07061767578125 0.07061767578125 0.0706787109375 0.07073974609375 0.07073974609375 0.07073974609375 0.07073974609375 0.07080078125 0.07080078125 0.07080078125 0.07086181640625 0.07086181640625 0.07086181640625 0.07086181640625 0.0709228515625 0.07098388671875 0.07098388671875 0.07098388671875 0.07098388671875 0.071044921875 0.071044921875 0.071044921875 0.07110595703125 0.07110595703125 0.07110595703125 0.07110595703125 0.0711669921875 0.07122802734375 0.07122802734375 0.07122802734375 0.07122802734375 0.0712890625 0.0712890625 0.0712890625 0.07135009765625 0.07135009765625 0.07135009765625 0.07135009765625 0.0714111328125 0.0714111328125 0.0714111328125 0.0714111328125 0.07147216796875 0.071533203125 0.071533203125 0.071533203125 0.071533203125 0.07159423828125 0.07159423828125 0.07159423828125 0.0716552734375 0.0716552734375 0.0716552734375 0.0716552734375 0.07171630859375 0.07177734375 0.07177734375 0.07177734375 0.07177734375 0.07183837890625 0.07183837890625 0.07183837890625 0.0718994140625 0.0718994140625 0.0718994140625 0.0718994140625 0.07196044921875 0.072021484375 0.072021484375 0.072021484375 0.072021484375 0.07208251953125 0.07208251953125 0.07208251953125 0.0721435546875 0.0721435546875 0.0721435546875 0.0721435546875 0.07220458984375 0.072265625 0.072265625 0.072265625 0.072265625 0.07232666015625 0.07232666015625 0.07232666015625 0.0723876953125 0.0723876953125 0.0723876953125 0.0723876953125 0.07244873046875 0.072509765625 0.072509765625 0.072509765625 0.072509765625 0.07257080078125 0.07257080078125 0.07257080078125 0.07257080078125 0.0726318359375 0.0726318359375 0.0726318359375 0.07269287109375 0.07269287109375 0.07269287109375 0.07269287109375 0.07275390625 0.07281494140625 0.07281494140625 0.07281494140625 0.07281494140625 0.0728759765625 0.0728759765625 0.0728759765625 0.07293701171875 0.07293701171875 0.07293701171875 0.07293701171875 0.072998046875 0.07305908203125 0.07305908203125 0.07305908203125 0.07305908203125 0.0731201171875 0.0731201171875 0.0731201171875 0.07318115234375 0.07318115234375 0.07318115234375 0.07318115234375 0.0732421875 0.07330322265625 0.07330322265625 0.07330322265625 0.07330322265625 0.0733642578125 0.0733642578125 0.0733642578125 0.07342529296875 0.07342529296875 0.07342529296875 0.07342529296875 0.073486328125 0.07354736328125 0.07354736328125 0.07354736328125 0.07354736328125 0.0736083984375 0.0736083984375 0.0736083984375 0.07366943359375 0.07366943359375 0.07366943359375 0.07366943359375 0.07373046875 0.07373046875 0.07373046875 0.07373046875 0.07379150390625 0.0738525390625 0.0738525390625 0.0738525390625 0.0738525390625 0.07391357421875 0.07391357421875 0.07391357421875 0.073974609375 0.073974609375 0.073974609375 0.073974609375 0.07403564453125 0.0740966796875 0.0740966796875 0.0740966796875 0.0740966796875 0.07415771484375 0.07415771484375 0.07415771484375 0.07421875 0.07421875 0.07421875 0.07421875 0.07427978515625 0.0743408203125 0.0743408203125 0.0743408203125 0.0743408203125 0.07440185546875 0.07440185546875 0.07440185546875 0.074462890625 0.074462890625 0.074462890625 0.074462890625 0.07452392578125 0.0745849609375 0.0745849609375 0.0745849609375 0.0745849609375 0.07464599609375 0.07464599609375 0.07464599609375 0.07470703125 0.07470703125 0.07470703125 0.07470703125 0.07476806640625 0.0748291015625 0.0748291015625 0.0748291015625 0.0748291015625 0.07489013671875 0.07489013671875 0.07489013671875 0.07489013671875 0.074951171875 0.074951171875 0.074951171875 0.07501220703125 0.07501220703125 0.07501220703125 0.07501220703125 0.0750732421875 0.07513427734375 0.07513427734375 0.07513427734375 0.07513427734375 0.0751953125 0.0751953125 0.0751953125 0.07525634765625 0.07525634765625 0.07525634765625 0.07525634765625 0.0753173828125 0.07537841796875 0.07537841796875 0.07537841796875 0.07537841796875 0.075439453125 0.075439453125 0.075439453125 0.07550048828125 0.07550048828125 0.07550048828125 0.07550048828125 0.0755615234375 0.07562255859375 0.07562255859375 0.07562255859375 0.07562255859375 0.07568359375 0.07568359375 0.07568359375 0.07574462890625 0.07574462890625 0.07574462890625 0.07574462890625 0.0758056640625 0.07586669921875 0.07586669921875 0.07586669921875 0.07586669921875 0.075927734375 0.075927734375 0.075927734375 0.07598876953125 0.07598876953125 0.07598876953125 0.07598876953125 0.0760498046875 0.0760498046875 0.0760498046875 0.0760498046875 0.07611083984375 0.076171875 0.076171875 0.076171875 0.076171875 0.07623291015625 0.07623291015625 0.07623291015625 0.0762939453125 0.0762939453125 0.0762939453125 0.0762939453125 0.07635498046875 0.076416015625 0.076416015625 0.076416015625 0.076416015625 0.07647705078125 0.07647705078125 0.07647705078125 0.0765380859375 0.0765380859375 0.0765380859375 0.0765380859375 0.07659912109375 0.07666015625 0.07666015625 0.07666015625 0.07666015625 0.07672119140625 0.07672119140625 0.07672119140625 0.0767822265625 0.0767822265625 0.0767822265625 0.0767822265625 0.07684326171875 0.076904296875 0.076904296875 0.076904296875 0.076904296875 0.07696533203125 0.07696533203125 0.07696533203125 0.0770263671875 0.0770263671875 0.0770263671875 0.0770263671875 0.07708740234375 0.0771484375 0.0771484375 0.0771484375 0.0771484375 0.07720947265625 0.07720947265625 0.07720947265625 0.0772705078125 0.0772705078125 0.0772705078125 0.0772705078125 0.07733154296875 0.07733154296875 0.07733154296875 0.07733154296875 0.077392578125 0.07745361328125 0.07745361328125 0.07745361328125 0.07745361328125 0.0775146484375 0.0775146484375 0.0775146484375 0.07757568359375 0.07757568359375 0.07757568359375 0.07757568359375 0.07763671875 0.07769775390625 0.07769775390625 0.07769775390625 0.07769775390625 0.0777587890625 0.0777587890625 0.0777587890625 0.07781982421875 0.07781982421875 0.07781982421875 0.07781982421875 0.077880859375 0.07794189453125 0.07794189453125 0.07794189453125 0.07794189453125 0.0780029296875 0.0780029296875 0.0780029296875 0.07806396484375 0.07806396484375 0.07806396484375 0.07806396484375 0.078125 0.07818603515625 0.07818603515625 0.07818603515625 0.07818603515625 0.0782470703125 0.0782470703125 0.0782470703125 0.07830810546875 0.07830810546875 0.07830810546875 0.07830810546875 0.078369140625 0.07843017578125 0.07843017578125 0.07843017578125 0.07843017578125 0.0784912109375 0.0784912109375 0.0784912109375 0.0784912109375 0.07855224609375 0.07855224609375 0.07855224609375 0.07861328125 0.07861328125 0.07861328125 0.07861328125 0.07867431640625 0.0787353515625 0.0787353515625 0.0787353515625 0.0787353515625 0.07879638671875 0.07879638671875 0.07879638671875 0.078857421875 0.078857421875 0.078857421875 0.078857421875 0.07891845703125 0.0789794921875 0.0789794921875 0.0789794921875 0.0789794921875 0.07904052734375 0.07904052734375 0.07904052734375 0.0791015625 0.0791015625 0.0791015625 0.0791015625 0.07916259765625 0.0792236328125 0.0792236328125 0.0792236328125 0.0792236328125 0.07928466796875 0.07928466796875 0.07928466796875 0.079345703125 0.079345703125 0.079345703125 0.079345703125 0.07940673828125 0.0794677734375 0.0794677734375 0.0794677734375 0.0794677734375 0.07952880859375 0.07952880859375 0.07952880859375 0.07958984375 0.07958984375 0.07958984375 0.07958984375 0.07965087890625 0.07965087890625 0.07965087890625 0.07965087890625 0.0797119140625 0.07977294921875 0.07977294921875 0.07977294921875 0.07977294921875 0.079833984375 0.079833984375 0.079833984375 0.07989501953125 0.07989501953125 0.07989501953125 0.07989501953125 0.0799560546875 0.08001708984375 0.08001708984375 0.08001708984375 0.08001708984375 0.080078125 0.080078125 0.080078125 0.08013916015625 0.08013916015625 0.08013916015625 0.08013916015625 0.0802001953125 0.08026123046875 0.08026123046875 0.08026123046875 0.08026123046875 0.080322265625 0.080322265625 0.080322265625 0.08038330078125 0.08038330078125 0.08038330078125 0.08038330078125 0.0804443359375 0.08050537109375 0.08050537109375 0.08050537109375 0.08050537109375 0.08056640625 0.08056640625 0.08056640625 0.08062744140625 0.08062744140625 0.08062744140625 0.08062744140625 0.0806884765625 0.08074951171875 0.08074951171875 0.08074951171875 0.08074951171875 0.080810546875 0.080810546875 0.080810546875 0.080810546875 0.08087158203125 0.08087158203125 0.08087158203125 0.0809326171875 0.0809326171875 0.0809326171875 0.0809326171875 0.08099365234375 0.0810546875 0.0810546875 0.0810546875 0.0810546875 0.08111572265625 0.08111572265625 0.08111572265625 0.0811767578125 0.0811767578125 0.0811767578125 0.0811767578125 0.08123779296875 0.081298828125 0.081298828125 0.081298828125 0.081298828125 0.08135986328125 0.08135986328125 0.08135986328125 0.0814208984375 0.0814208984375 0.0814208984375 0.0814208984375 0.08148193359375 0.08154296875 0.08154296875 0.08154296875 0.08154296875 0.08160400390625 0.08160400390625 0.08160400390625 0.0816650390625 0.0816650390625 0.0816650390625 0.0816650390625 0.08172607421875 0.081787109375 0.081787109375 0.081787109375 0.081787109375 0.08184814453125 0.08184814453125 0.08184814453125 0.0819091796875 0.0819091796875 0.0819091796875 0.0819091796875 0.0819091796875 0.08197021484375 0.08197021484375 0.08197021484375 0.08209228515625 0.08209228515625 0.08209228515625 0.08209228515625 0.08209228515625 0.0821533203125 0.0821533203125 0.0821533203125 0.08221435546875 0.08221435546875 0.08221435546875 0.08221435546875 0.08221435546875 0.08233642578125 0.08233642578125 0.08233642578125 0.0823974609375 0.0823974609375 0.0823974609375 0.0823974609375 0.0823974609375 0.08245849609375 0.08245849609375 0.08245849609375 0.08258056640625 0.08258056640625 0.08258056640625 0.08258056640625 0.08258056640625 0.0826416015625 0.0826416015625 0.0826416015625 0.08270263671875 0.08270263671875 0.08270263671875 0.08270263671875 0.08270263671875 0.08282470703125 0.08282470703125 0.08282470703125 0.0828857421875 0.0828857421875 0.0828857421875 0.0828857421875 0.0828857421875 0.08294677734375 0.08294677734375 0.08294677734375 0.08306884765625 0.08306884765625 0.08306884765625 0.08306884765625 0.08306884765625 0.0831298828125 0.0831298828125 0.0831298828125 0.08319091796875 0.08319091796875 0.08319091796875 0.08319091796875 0.08319091796875 0.083251953125 0.083251953125 0.083251953125 0.0833740234375 0.0833740234375 0.0833740234375 0.0833740234375 0.0833740234375 0.08343505859375 0.08343505859375 0.08343505859375 0.08349609375 0.08349609375 0.08349609375 0.08349609375 0.08349609375 0.0836181640625 0.0836181640625 0.0836181640625 0.08367919921875 0.08367919921875 0.08367919921875 0.08367919921875 0.08367919921875 0.083740234375 0.083740234375 0.083740234375 0.0838623046875 0.0838623046875 0.0838623046875 0.0838623046875 0.0838623046875 0.08392333984375 0.08392333984375 0.08392333984375 0.083984375 0.083984375 0.083984375 0.083984375 0.083984375 0.0841064453125 0.0841064453125 0.0841064453125 0.08416748046875 0.08416748046875 0.08416748046875 0.08416748046875 0.08416748046875 0.084228515625 0.084228515625 0.084228515625 0.08428955078125 0.08428955078125 0.08428955078125 0.08428955078125 0.08428955078125 0.08441162109375 0.08441162109375 0.08441162109375 0.08447265625 0.08447265625 0.08447265625 0.08447265625 0.08447265625 0.08453369140625 0.08453369140625 0.08453369140625 0.08465576171875 0.08465576171875 0.08465576171875 0.08465576171875 0.08465576171875 0.084716796875 0.084716796875 0.084716796875 0.08477783203125 0.08477783203125 0.08477783203125 0.08477783203125 0.08477783203125 0.08489990234375 0.08489990234375 0.08489990234375 0.0849609375 0.0849609375 0.0849609375 0.0849609375 0.0849609375 0.08502197265625 0.08502197265625 0.08502197265625 0.08514404296875 0.08514404296875 0.08514404296875 0.08514404296875 0.08514404296875 0.085205078125 0.085205078125 0.085205078125 0.08526611328125 0.08526611328125 0.08526611328125 0.08526611328125 0.08526611328125 0.08538818359375 0.08538818359375 0.08538818359375 0.08544921875 0.08544921875 0.08544921875 0.08544921875 0.08544921875 0.08551025390625 0.08551025390625 0.08551025390625 0.0855712890625 0.0855712890625 0.0855712890625 0.0855712890625 0.0855712890625 0.085693359375 0.085693359375 0.085693359375 0.08575439453125 0.08575439453125 0.08575439453125 0.08575439453125 0.08575439453125 0.0858154296875 0.0858154296875 0.0858154296875 0.0859375 0.0859375 0.0859375 0.0859375 0.0859375 0.08599853515625 0.08599853515625 0.08599853515625 0.0860595703125 0.0860595703125 0.0860595703125 0.0860595703125 0.0860595703125 0.086181640625 0.086181640625 0.086181640625 0.08624267578125 0.08624267578125 0.08624267578125 0.08624267578125 0.08624267578125 0.0863037109375 0.0863037109375 0.0863037109375 0.08642578125 0.08642578125 0.08642578125 0.08642578125 0.08642578125 0.08648681640625 0.08648681640625 0.08648681640625 0.0865478515625 0.0865478515625 0.0865478515625 0.0865478515625 0.0865478515625 0.086669921875 0.086669921875 0.086669921875 0.08673095703125 0.08673095703125 0.08673095703125 0.08673095703125 0.08673095703125 0.0867919921875 0.0867919921875 0.0867919921875 0.08685302734375 0.08685302734375 0.08685302734375 0.08685302734375 0.08685302734375 0.08697509765625 0.08697509765625 0.08697509765625 0.0870361328125 0.0870361328125 0.0870361328125 0.0870361328125 0.0870361328125 0.08709716796875 0.08709716796875 0.08709716796875 0.08721923828125 0.08721923828125 0.08721923828125 0.08721923828125 0.08721923828125 0.0872802734375 0.0872802734375 0.0872802734375 0.08734130859375 0.08734130859375 0.08734130859375 0.08734130859375 0.08734130859375 0.08746337890625 0.08746337890625 0.08746337890625 0.0875244140625 0.0875244140625 0.0875244140625 0.0875244140625 0.0875244140625 0.08758544921875 0.08758544921875 0.08758544921875 0.08770751953125 0.08770751953125 0.08770751953125 0.08770751953125 0.08770751953125 0.0877685546875 0.0877685546875 0.0877685546875 0.08782958984375 0.08782958984375 0.08782958984375 0.08782958984375 0.08782958984375 0.087890625 0.087890625 0.087890625 0.0880126953125 0.0880126953125 0.0880126953125 0.0880126953125 0.0880126953125 0.08807373046875 0.08807373046875 0.08807373046875 0.088134765625 0.088134765625 0.088134765625 0.088134765625 0.088134765625 0.0882568359375 0.0882568359375 0.0882568359375 0.08831787109375 0.08831787109375 0.08831787109375 0.08831787109375 0.08831787109375 0.08837890625 0.08837890625 0.08837890625 0.0885009765625 0.0885009765625 0.0885009765625 0.0885009765625 0.0885009765625 0.08856201171875 0.08856201171875 0.08856201171875 0.088623046875 0.088623046875 0.088623046875 0.088623046875 0.088623046875 0.0887451171875 0.0887451171875 0.0887451171875 0.08880615234375 0.08880615234375 0.08880615234375 0.08880615234375 0.08880615234375 0.0888671875 0.0888671875 0.0888671875 0.0889892578125 0.0889892578125 0.0889892578125 0.0889892578125 0.0889892578125 0.08905029296875 0.08905029296875 0.08905029296875 0.089111328125 0.089111328125 0.089111328125 0.089111328125 0.089111328125 0.08917236328125 0.08917236328125 0.08917236328125 0.08929443359375 0.08929443359375 0.08929443359375 0.08929443359375 0.08929443359375 0.08935546875 0.08935546875 0.08935546875 0.08941650390625 0.08941650390625 0.08941650390625 0.08941650390625 0.08941650390625 0.08953857421875 0.08953857421875 0.08953857421875 0.089599609375 0.089599609375 0.089599609375 0.089599609375 0.089599609375 0.08966064453125 0.08966064453125 0.08966064453125 0.08978271484375 0.08978271484375 0.08978271484375 0.08978271484375 0.08978271484375 0.08984375 0.08984375 0.08984375 0.08990478515625 0.08990478515625 0.08990478515625 0.08990478515625 0.08990478515625 0.09002685546875 0.09002685546875 0.09002685546875 0.090087890625 0.090087890625 0.090087890625 0.090087890625 0.090087890625 0.09014892578125 0.09014892578125 0.09014892578125 0.0902099609375 0.0902099609375 0.0902099609375 0.0902099609375 0.0902099609375 0.09033203125 0.09033203125 0.09033203125 0.09039306640625 0.09039306640625 0.09039306640625 0.09039306640625 0.09039306640625 0.0904541015625 0.0904541015625 0.0904541015625 0.090576171875 0.090576171875 0.090576171875 0.090576171875 0.090576171875 0.09063720703125 0.09063720703125 0.09063720703125 0.0906982421875 0.0906982421875 0.0906982421875 0.0906982421875 0.0906982421875 0.0908203125 0.0908203125 0.0908203125 0.09088134765625 0.09088134765625 0.09088134765625 0.09088134765625 0.09088134765625 0.0909423828125 0.0909423828125 0.0909423828125 0.091064453125 0.091064453125 0.091064453125 0.091064453125 0.091064453125 0.09112548828125 0.09112548828125 0.09112548828125 0.0911865234375 0.0911865234375 0.0911865234375 0.0911865234375 0.0911865234375 0.09130859375 0.09130859375 0.09130859375 0.09136962890625 0.09136962890625 0.09136962890625 0.09136962890625 0.09136962890625 0.0914306640625 0.0914306640625 0.0914306640625 0.09149169921875 0.09149169921875 0.09149169921875 0.09149169921875 0.09149169921875 0.09161376953125 0.09161376953125 0.09161376953125 0.0916748046875 0.0916748046875 0.0916748046875 0.0916748046875 0.0916748046875 0.09173583984375 0.09173583984375 0.09173583984375 0.09185791015625 0.09185791015625 0.09185791015625 0.09185791015625 0.09185791015625 0.0919189453125 0.0919189453125 0.0919189453125 0.09197998046875 0.09197998046875 0.09197998046875 0.09197998046875 0.09197998046875 0.09210205078125 0.09210205078125 0.09210205078125 0.0921630859375 0.0921630859375 0.0921630859375 0.0921630859375 0.0921630859375 0.09222412109375 0.09222412109375 0.09222412109375 0.09234619140625 0.09234619140625 0.09234619140625 0.09234619140625 0.09234619140625 0.0924072265625 0.0924072265625 0.0924072265625 0.09246826171875 0.09246826171875 0.09246826171875 0.09246826171875 0.09246826171875 0.09259033203125 0.09259033203125 0.09259033203125 0.0926513671875 0.0926513671875 0.0926513671875 0.0926513671875 0.0926513671875 0.09271240234375 0.09271240234375 0.09271240234375 0.0927734375 0.0927734375 0.0927734375 0.0927734375 0.0927734375 0.0928955078125 0.0928955078125 0.0928955078125 0.09295654296875 0.09295654296875 0.09295654296875 0.09295654296875 0.09295654296875 0.093017578125 0.093017578125 0.093017578125 0.0931396484375 0.0931396484375 0.0931396484375 0.0931396484375 0.0931396484375 0.09320068359375 0.09320068359375 0.09320068359375 0.09326171875 0.09326171875 0.09326171875 0.09326171875 0.09326171875 0.0933837890625 0.0933837890625 0.0933837890625 0.09344482421875 0.09344482421875 0.09344482421875 0.09344482421875 0.09344482421875 0.093505859375 0.093505859375 0.093505859375 0.0936279296875 0.0936279296875 0.0936279296875 0.0936279296875 0.0936279296875 0.09368896484375 0.09368896484375 0.09368896484375 0.09375 0.09375 0.09375 0.09375 0.09375 0.09381103515625 0.09381103515625 0.09381103515625 0.09393310546875 0.09393310546875 0.09393310546875 0.09393310546875 0.09393310546875 0.093994140625 0.093994140625 0.093994140625 0.09405517578125 0.09405517578125 0.09405517578125 0.09405517578125 0.09405517578125 0.09417724609375 0.09417724609375 0.09417724609375 0.09423828125 0.09423828125 0.09423828125 0.09423828125 0.09423828125 0.09429931640625 0.09429931640625 0.09429931640625 0.09442138671875 0.09442138671875 0.09442138671875 0.09442138671875 0.09442138671875 0.094482421875 0.094482421875 0.094482421875 0.09454345703125 0.09454345703125 0.09454345703125 0.09454345703125 0.09454345703125 0.09466552734375 0.09466552734375 0.09466552734375 0.0947265625 0.0947265625 0.0947265625 0.0947265625 0.0947265625 0.09478759765625 0.09478759765625 0.09478759765625 0.09490966796875 0.09490966796875 0.09490966796875 0.09490966796875 0.09490966796875 0.094970703125 0.094970703125 0.094970703125 0.09503173828125 0.09503173828125 0.09503173828125 0.09503173828125 0.09503173828125 0.0950927734375 0.0950927734375 0.0950927734375 0.09521484375 0.09521484375 0.09521484375 0.09521484375 0.09521484375 0.09527587890625 0.09527587890625 0.09527587890625 0.0953369140625 0.0953369140625 0.0953369140625 0.0953369140625 0.0953369140625 0.095458984375 0.095458984375 0.095458984375 0.09552001953125 0.09552001953125 0.09552001953125 0.09552001953125 0.09552001953125 0.0955810546875 0.0955810546875 0.0955810546875 0.095703125 0.095703125 0.095703125 0.095703125 0.095703125 0.09576416015625 0.09576416015625 0.09576416015625 0.0958251953125 0.0958251953125 0.0958251953125 0.0958251953125 0.0958251953125 0.095947265625 0.095947265625 0.095947265625 0.09600830078125 0.09600830078125 0.09600830078125 0.09600830078125 0.09600830078125 0.0960693359375 0.0960693359375 0.0960693359375 0.09613037109375 0.09613037109375 0.09613037109375 0.09613037109375 0.09613037109375 0.09625244140625 0.09625244140625 0.09625244140625 0.0963134765625 0.0963134765625 0.0963134765625 0.0963134765625 0.0963134765625 0.09637451171875 0.09637451171875 0.09637451171875 0.09649658203125 0.09649658203125 0.09649658203125 0.09649658203125 0.09649658203125 0.0965576171875 0.0965576171875 0.0965576171875 0.09661865234375 0.09661865234375 0.09661865234375 0.09661865234375 0.09661865234375 0.09674072265625 0.09674072265625 0.09674072265625 0.0968017578125 0.0968017578125 0.0968017578125 0.0968017578125 0.0968017578125 0.09686279296875 0.09686279296875 0.09686279296875 0.09698486328125 0.09698486328125 0.09698486328125 0.09698486328125 0.09698486328125 0.0970458984375 0.0970458984375 0.0970458984375 0.09710693359375 0.09710693359375 0.09710693359375 0.09710693359375 0.09710693359375 0.09722900390625 0.09722900390625 0.09722900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.09735107421875 0.09735107421875 0.09735107421875 0.097412109375 0.097412109375 0.097412109375 0.097412109375 0.097412109375 0.0975341796875 0.0975341796875 0.0975341796875 0.09759521484375 0.09759521484375 0.09759521484375 0.09759521484375 0.09759521484375 0.09765625 0.09765625 0.09765625 0.0977783203125 0.0977783203125 0.0977783203125 0.0977783203125 0.0977783203125 0.09783935546875 0.09783935546875 0.09783935546875 0.097900390625 0.097900390625 0.097900390625 0.097900390625 0.097900390625 0.0980224609375 0.0980224609375 0.0980224609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09814453125 0.09814453125 0.09814453125 0.0982666015625 0.0982666015625 0.0982666015625 0.0982666015625 0.0982666015625 0.09832763671875 0.09832763671875 0.09832763671875 0.098388671875 0.098388671875 0.098388671875 0.098388671875 0.098388671875 0.09844970703125 0.09844970703125 0.09844970703125 0.09857177734375 0.09857177734375 0.09857177734375 0.09857177734375 0.09857177734375 0.0986328125 0.0986328125 0.0986328125 0.09869384765625 0.09869384765625 0.09869384765625 0.09869384765625 0.09869384765625 0.09881591796875 0.09881591796875 0.09881591796875 0.098876953125 0.098876953125 0.098876953125 0.098876953125 0.098876953125 0.09893798828125 0.09893798828125 0.09893798828125 0.09906005859375 0.09906005859375 0.09906005859375 0.09906005859375 0.09906005859375 0.09912109375 0.09912109375 0.09912109375 0.09918212890625 0.09918212890625 0.09918212890625 0.09918212890625 0.09918212890625 0.09930419921875 0.09930419921875 0.09930419921875 0.099365234375 0.099365234375 0.099365234375 0.099365234375 0.099365234375 0.09942626953125 0.09942626953125 0.09942626953125 0.09954833984375 0.09954833984375 0.09954833984375 0.09954833984375 0.09954833984375 0.099609375 0.099609375 0.099609375 0.09967041015625 0.09967041015625 0.09967041015625 0.09967041015625 0.09967041015625 0.0997314453125 0.0997314453125 0.0997314453125 0.099853515625 0.099853515625 0.099853515625 0.099853515625 0.099853515625 0.09991455078125 0.09991455078125 0.09991455078125 0.0999755859375 0.0999755859375 0.0999755859375 0.0999755859375 0.0999755859375 0.10009765625 0.10009765625 0.10009765625 0.10015869140625 0.10015869140625 0.10015869140625 0.10015869140625 0.10015869140625 0.1002197265625 0.1002197265625 0.1002197265625 0.100341796875 0.100341796875 0.100341796875 0.100341796875 0.100341796875 0.10040283203125 0.10040283203125 0.10040283203125 0.1004638671875 0.1004638671875 0.1004638671875 0.1004638671875 0.1004638671875 0.1005859375 0.1005859375 0.1005859375 0.10064697265625 0.10064697265625 0.10064697265625 0.10064697265625 0.10064697265625 0.1007080078125 0.1007080078125 0.1007080078125 0.100830078125 0.100830078125 0.100830078125 0.100830078125 0.100830078125 0.10089111328125 0.10089111328125 0.10089111328125 0.1009521484375 0.1009521484375 0.1009521484375 0.1009521484375 0.1009521484375 0.10101318359375 0.10101318359375 0.10101318359375 0.10113525390625 0.10113525390625 0.10113525390625 0.10113525390625 0.10113525390625 0.1011962890625 0.1011962890625 0.1011962890625 0.10125732421875 0.10125732421875 0.10125732421875 0.10125732421875 0.10125732421875 0.10137939453125 0.10137939453125 0.10137939453125 0.1014404296875 0.1014404296875 0.1014404296875 0.1014404296875 0.1014404296875 0.10150146484375 0.10150146484375 0.10150146484375 0.10162353515625 0.10162353515625 0.10162353515625 0.10162353515625 0.10162353515625 0.1016845703125 0.1016845703125 0.1016845703125 0.10174560546875 0.10174560546875 0.10174560546875 0.10174560546875 0.10174560546875 0.10186767578125 0.10186767578125 0.10186767578125 0.1019287109375 0.1019287109375 0.1019287109375 0.1019287109375 0.1019287109375 0.10198974609375 0.10198974609375 0.10198974609375 0.10205078125 0.10205078125 0.10205078125 0.10205078125 0.10205078125 0.1021728515625 0.1021728515625 0.1021728515625 0.10223388671875 0.10223388671875 0.10223388671875 0.10223388671875 0.10223388671875 0.102294921875 0.102294921875 0.102294921875 0.1024169921875 0.1024169921875 0.1024169921875 0.1024169921875 0.1024169921875 0.10247802734375 0.10247802734375 0.10247802734375 0.1025390625 0.1025390625 0.1025390625 0.1025390625 0.1025390625 0.1026611328125 0.1026611328125 0.1026611328125 0.10272216796875 0.10272216796875 0.10272216796875 0.10272216796875 0.10272216796875 0.102783203125 0.102783203125 0.102783203125 0.1029052734375 0.1029052734375 0.1029052734375 0.1029052734375 0.1029052734375 0.10296630859375 0.10296630859375 0.10296630859375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.1031494140625 0.1031494140625 0.1031494140625 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.103271484375 0.103271484375 0.103271484375 0.10333251953125 0.10333251953125 0.10333251953125 0.10333251953125 0.10333251953125 0.10345458984375 0.10345458984375 0.10345458984375 0.103515625 0.103515625 0.103515625 0.103515625 0.103515625 0.10357666015625 0.10357666015625 0.10357666015625 0.10369873046875 0.10369873046875 0.10369873046875 0.10369873046875 0.10369873046875 0.103759765625 0.103759765625 0.103759765625 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.10394287109375 0.10394287109375 0.10394287109375 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.10406494140625 0.10406494140625 0.10406494140625 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.104248046875 0.104248046875 0.104248046875 0.10430908203125 0.10430908203125 0.10430908203125 0.10430908203125 0.10430908203125 0.1043701171875 0.1043701171875 0.1043701171875 0.1044921875 0.1044921875 0.1044921875 0.1044921875 0.1044921875 0.10455322265625 0.10455322265625 0.10455322265625 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.104736328125 0.104736328125 0.104736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.1048583984375 0.1048583984375 0.1048583984375 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.10504150390625 0.10504150390625 0.10504150390625 0.1051025390625 0.1051025390625 0.1051025390625 0.1051025390625 0.1051025390625 0.105224609375 0.105224609375 0.105224609375 0.10528564453125 0.10528564453125 0.10528564453125 0.10528564453125 0.10528564453125 0.1053466796875 0.1053466796875 0.1053466796875 0.10546875 0.10546875 0.10546875 0.10546875 0.10546875 0.10552978515625 0.10552978515625 0.10552978515625 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.10565185546875 0.10565185546875 0.10565185546875 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.1058349609375 0.1058349609375 0.1058349609375 0.10589599609375 0.10589599609375 0.10589599609375 0.10589599609375 0.10589599609375 0.10601806640625 0.10601806640625 0.10601806640625 0.1060791015625 0.1060791015625 0.1060791015625 0.1060791015625 0.1060791015625 0.10614013671875 0.10614013671875 0.10614013671875 0.10626220703125 0.10626220703125 0.10626220703125 0.10626220703125 0.10626220703125 0.1063232421875 0.1063232421875 0.1063232421875 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10650634765625 0.10650634765625 0.10650634765625 0.1065673828125 0.1065673828125 0.1065673828125 0.1065673828125 0.1065673828125 0.10662841796875 0.10662841796875 0.10662841796875 0.10675048828125 0.10675048828125 0.10675048828125 0.10675048828125 0.10675048828125 0.1068115234375 0.1068115234375 0.1068115234375 0.10687255859375 0.10687255859375 0.10687255859375 0.10687255859375 0.10687255859375 0.10693359375 0.10693359375 0.10693359375 0.1070556640625 0.1070556640625 0.1070556640625 0.1070556640625 0.1070556640625 0.10711669921875 0.10711669921875 0.10711669921875 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.1072998046875 0.1072998046875 0.1072998046875 0.10736083984375 0.10736083984375 0.10736083984375 0.10736083984375 0.10736083984375 0.107421875 0.107421875 0.107421875 0.1075439453125 0.1075439453125 0.1075439453125 0.1075439453125 0.1075439453125 0.10760498046875 0.10760498046875 0.10760498046875 0.107666015625 0.107666015625 0.107666015625 0.107666015625 0.107666015625 0.1077880859375 0.1077880859375 0.1077880859375 0.10784912109375 0.10784912109375 0.10784912109375 0.10784912109375 0.10784912109375 0.10791015625 0.10791015625 0.10791015625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10809326171875 0.10809326171875 0.10809326171875 0.108154296875 0.108154296875 0.108154296875 0.108154296875 0.108154296875 0.10821533203125 0.10821533203125 0.10821533203125 0.10833740234375 0.10833740234375 0.10833740234375 0.10833740234375 0.10833740234375 0.1083984375 0.1083984375 0.1083984375 0.10845947265625 0.10845947265625 0.10845947265625 0.10845947265625 0.10845947265625 0.10858154296875 0.10858154296875 0.10858154296875 0.108642578125 0.108642578125 0.108642578125 0.108642578125 0.108642578125 0.10870361328125 0.10870361328125 0.10870361328125 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10888671875 0.10888671875 0.10888671875 0.10894775390625 0.10894775390625 0.10894775390625 0.10894775390625 0.10894775390625 0.10906982421875 0.10906982421875 0.10906982421875 0.109130859375 0.109130859375 0.109130859375 0.109130859375 0.109130859375 0.10919189453125 0.10919189453125 0.10919189453125 0.1092529296875 0.1092529296875 0.1092529296875 0.1092529296875 0.1092529296875 0.109375 0.109375 0.109375 0.10943603515625 0.10943603515625 0.10943603515625 0.10943603515625 0.10943603515625 0.1094970703125 0.1094970703125 0.1094970703125 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.10968017578125 0.10968017578125 0.10968017578125 0.1097412109375 0.1097412109375 0.1097412109375 0.1097412109375 0.1097412109375 0.10986328125 0.10986328125 0.10986328125 0.10992431640625 0.10992431640625 0.10992431640625 0.10992431640625 0.10992431640625 0.1099853515625 0.1099853515625 0.1099853515625 0.110107421875 0.110107421875 0.110107421875 0.110107421875 0.110107421875 0.11016845703125 0.11016845703125 0.11016845703125 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.11029052734375 0.11029052734375 0.11029052734375 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.1104736328125 0.1104736328125 0.1104736328125 0.11053466796875 0.11053466796875 0.11053466796875 0.11053466796875 0.11053466796875 0.11065673828125 0.11065673828125 0.11065673828125 0.1107177734375 0.1107177734375 0.1107177734375 0.1107177734375 0.1107177734375 0.11077880859375 0.11077880859375 0.11077880859375 0.11090087890625 0.11090087890625 0.11090087890625 0.11090087890625 0.11090087890625 0.1109619140625 0.1109619140625 0.1109619140625 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.11114501953125 0.11114501953125 0.11114501953125 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.11126708984375 0.11126708984375 0.11126708984375 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.1114501953125 0.1114501953125 0.1114501953125 0.11151123046875 0.11151123046875 0.11151123046875 0.11151123046875 0.11151123046875 0.111572265625 0.111572265625 0.111572265625 0.1116943359375 0.1116943359375 0.1116943359375 0.1116943359375 0.1116943359375 0.11175537109375 0.11175537109375 0.11175537109375 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.1119384765625 0.1119384765625 0.1119384765625 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.112060546875 0.112060546875 0.112060546875 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.11224365234375 0.11224365234375 0.11224365234375 0.1123046875 0.1123046875 0.1123046875 0.1123046875 0.1123046875 0.1124267578125 0.1124267578125 0.1124267578125 0.11248779296875 0.11248779296875 0.11248779296875 0.11248779296875 0.11248779296875 0.112548828125 0.112548828125 0.112548828125 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.11273193359375 0.11273193359375 0.11273193359375 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.11285400390625 0.11285400390625 0.11285400390625 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.113037109375 0.113037109375 0.113037109375 0.11309814453125 0.11309814453125 0.11309814453125 0.11309814453125 0.11309814453125 0.11322021484375 0.11322021484375 0.11322021484375 0.11328125 0.11328125 0.11328125 0.11328125 0.11328125 0.11334228515625 0.11334228515625 0.11334228515625 0.11346435546875 0.11346435546875 0.11346435546875 0.11346435546875 0.11346435546875 0.113525390625 0.113525390625 0.113525390625 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11370849609375 0.11370849609375 0.11370849609375 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.11383056640625 0.11383056640625 0.11383056640625 0.1138916015625 0.1138916015625 0.1138916015625 0.1138916015625 0.1138916015625 0.114013671875 0.114013671875 0.114013671875 0.11407470703125 0.11407470703125 0.11407470703125 0.11407470703125 0.11407470703125 0.1141357421875 0.1141357421875 0.1141357421875 0.1142578125 0.1142578125 0.1142578125 0.1142578125 0.1142578125 0.11431884765625 0.11431884765625 0.11431884765625 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.114501953125 0.114501953125 0.114501953125 0.11456298828125 0.11456298828125 0.11456298828125 0.11456298828125 0.11456298828125 0.1146240234375 0.1146240234375 0.1146240234375 0.11474609375 0.11474609375 0.11474609375 0.11474609375 0.11474609375 0.11480712890625 0.11480712890625 0.11480712890625 0.1148681640625 0.1148681640625 0.1148681640625 0.1148681640625 0.1148681640625 0.114990234375 0.114990234375 0.114990234375 0.11505126953125 0.11505126953125 0.11505126953125 0.11505126953125 0.11505126953125 0.1151123046875 0.1151123046875 0.1151123046875 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11529541015625 0.11529541015625 0.11529541015625 0.1153564453125 0.1153564453125 0.1153564453125 0.1153564453125 0.1153564453125 0.11541748046875 0.11541748046875 0.11541748046875 0.11553955078125 0.11553955078125 0.11553955078125 0.11553955078125 0.11553955078125 0.1156005859375 0.1156005859375 0.1156005859375 0.11566162109375 0.11566162109375 0.11566162109375 0.11566162109375 0.11566162109375 0.11578369140625 0.11578369140625 0.11578369140625 0.1158447265625 0.1158447265625 0.1158447265625 0.1158447265625 0.1158447265625 0.11590576171875 0.11590576171875 0.11590576171875 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.1160888671875 0.1160888671875 0.1160888671875 0.11614990234375 0.11614990234375 0.11614990234375 0.11614990234375 0.11614990234375 0.1162109375 0.1162109375 0.1162109375 0.1163330078125 0.1163330078125 0.1163330078125 0.1163330078125 0.1163330078125 0.11639404296875 0.11639404296875 0.11639404296875 0.116455078125 0.116455078125 0.116455078125 0.116455078125 0.116455078125 0.1165771484375 0.1165771484375 0.1165771484375 0.11663818359375 0.11663818359375 0.11663818359375 0.11663818359375 0.11663818359375 0.11669921875 0.11669921875 0.11669921875 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.11688232421875 0.11688232421875 0.11688232421875 0.116943359375 0.116943359375 0.116943359375 0.116943359375 0.116943359375 0.1170654296875 0.1170654296875 0.1170654296875 0.11712646484375 0.11712646484375 0.11712646484375 0.11712646484375 0.11712646484375 0.1171875 0.1171875 0.1171875 0.1173095703125 0.1173095703125 0.1173095703125 0.1173095703125 0.1173095703125 0.11737060546875 0.11737060546875 0.11737060546875 0.117431640625 0.117431640625 0.117431640625 0.117431640625 0.117431640625 0.11749267578125 0.11749267578125 0.11749267578125 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11767578125 0.11767578125 0.11767578125 0.11773681640625 0.11773681640625 0.11773681640625 0.11773681640625 0.11773681640625 0.11785888671875 0.11785888671875 0.11785888671875 0.117919921875 0.117919921875 0.117919921875 0.117919921875 0.117919921875 0.11798095703125 0.11798095703125 0.11798095703125 0.11810302734375 0.11810302734375 0.11810302734375 0.11810302734375 0.11810302734375 0.1181640625 0.1181640625 0.1181640625 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.11834716796875 0.11834716796875 0.11834716796875 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.11846923828125 0.11846923828125 0.11846923828125 0.1185302734375 0.1185302734375 0.1185302734375 0.1185302734375 0.1185302734375 0.11865234375 0.11865234375 0.11865234375 0.11871337890625 0.11871337890625 0.11871337890625 0.11871337890625 0.11871337890625 0.1187744140625 0.1187744140625 0.1187744140625 0.118896484375 0.118896484375 0.118896484375 0.118896484375 0.118896484375 0.11895751953125 0.11895751953125 0.11895751953125 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.119140625 0.119140625 0.119140625 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.1192626953125 0.1192626953125 0.1192626953125 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.11944580078125 0.11944580078125 0.11944580078125 0.1195068359375 0.1195068359375 0.1195068359375 0.1195068359375 0.1195068359375 0.11962890625 0.11962890625 0.11962890625 0.11968994140625 0.11968994140625 0.11968994140625 0.11968994140625 0.11968994140625 0.1197509765625 0.1197509765625 0.1197509765625 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.11993408203125 0.11993408203125 0.11993408203125 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.12005615234375 0.12005615234375 0.12005615234375 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.1202392578125 0.1202392578125 0.1202392578125 0.12030029296875 0.12030029296875 0.12030029296875 0.12030029296875 0.12030029296875 0.12042236328125 0.12042236328125 0.12042236328125 0.1204833984375 0.1204833984375 0.1204833984375 0.1204833984375 0.1204833984375 0.12054443359375 0.12054443359375 0.12054443359375 0.12066650390625 0.12066650390625 0.12066650390625 0.12066650390625 0.12066650390625 0.1207275390625 0.1207275390625 0.1207275390625 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.120849609375 0.120849609375 0.120849609375 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.12103271484375 0.12103271484375 0.12103271484375 0.12109375 0.12109375 0.12109375 0.12109375 0.12109375 0.1212158203125 0.1212158203125 0.1212158203125 0.12127685546875 0.12127685546875 0.12127685546875 0.12127685546875 0.12127685546875 0.121337890625 0.121337890625 0.121337890625 0.1214599609375 0.1214599609375 0.1214599609375 0.1214599609375 0.1214599609375 0.12152099609375 0.12152099609375 0.12152099609375 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.1217041015625 0.1217041015625 0.1217041015625 0.12176513671875 0.12176513671875 0.12176513671875 0.12176513671875 0.12176513671875 0.121826171875 0.121826171875 0.121826171875 0.1219482421875 0.1219482421875 0.1219482421875 0.1219482421875 0.1219482421875 0.12200927734375 0.12200927734375 0.12200927734375 0.1220703125 0.1220703125 0.1220703125 0.1220703125 0.1220703125 0.12213134765625 0.12213134765625 0.12213134765625 0.12225341796875 0.12225341796875 0.12225341796875 0.12225341796875 0.12225341796875 0.122314453125 0.122314453125 0.122314453125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12249755859375 0.12249755859375 0.12249755859375 0.12255859375 0.12255859375 0.12255859375 0.12255859375 0.12255859375 0.12261962890625 0.12261962890625 0.12261962890625 0.12274169921875 0.12274169921875 0.12274169921875 0.12274169921875 0.12274169921875 0.122802734375 0.122802734375 0.122802734375 0.12286376953125 0.12286376953125 0.12286376953125 0.12286376953125 0.12286376953125 0.12298583984375 0.12298583984375 0.12298583984375 0.123046875 0.123046875 0.123046875 0.123046875 0.123046875 0.12310791015625 0.12310791015625 0.12310791015625 0.12322998046875 0.12322998046875 0.12322998046875 0.12322998046875 0.12322998046875 0.123291015625 0.123291015625 0.123291015625 0.12335205078125 0.12335205078125 0.12335205078125 0.12335205078125 0.12335205078125 0.1234130859375 0.1234130859375 0.1234130859375 0.12353515625 0.12353515625 0.12353515625 0.12353515625 0.12353515625 0.12359619140625 0.12359619140625 0.12359619140625 0.1236572265625 0.1236572265625 0.1236572265625 0.1236572265625 0.1236572265625 0.123779296875 0.123779296875 0.123779296875 0.12384033203125 0.12384033203125 0.12384033203125 0.12384033203125 0.12384033203125 0.1239013671875 0.1239013671875 0.1239013671875 0.1240234375 0.1240234375 0.1240234375 0.1240234375 0.1240234375 0.12408447265625 0.12408447265625 0.12408447265625 0.1241455078125 0.1241455078125 0.1241455078125 0.1241455078125 0.1241455078125 0.124267578125 0.124267578125 0.124267578125 0.12432861328125 0.12432861328125 0.12432861328125 0.12432861328125 0.12432861328125 0.1243896484375 0.1243896484375 0.1243896484375 0.12445068359375 0.12445068359375 0.12445068359375 0.12445068359375 0.12445068359375 0.12457275390625 0.12457275390625 0.12457275390625 0.1246337890625 0.1246337890625 0.1246337890625 0.1246337890625 0.1246337890625 0.12469482421875 0.12469482421875 0.12469482421875 0.12481689453125 0.12481689453125 0.12481689453125 0.12481689453125 0.12481689453125 0.1248779296875 0.1248779296875 0.1248779296875 0.12493896484375 0.12493896484375 0.12493896484375 0.12493896484375 0.12493896484375 0.125 0.125 0.125 0.1251220703125 0.1251220703125 0.1251220703125 0.1251220703125 0.1251220703125 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.1253662109375 0.1253662109375 0.1253662109375 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.1258544921875 0.1258544921875 0.1258544921875 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1260986328125 0.1260986328125 0.1260986328125 0.1260986328125 0.1260986328125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.12646484375 0.12646484375 0.12646484375 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.126708984375 0.126708984375 0.126708984375 0.126708984375 0.126708984375 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.126953125 0.126953125 0.126953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.12744140625 0.12744140625 0.12744140625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.127685546875 0.127685546875 0.127685546875 0.127685546875 0.127685546875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1279296875 0.1279296875 0.1279296875 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.12841796875 0.12841796875 0.12841796875 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.1290283203125 0.1290283203125 0.1290283203125 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.1292724609375 0.1292724609375 0.1292724609375 0.1292724609375 0.1292724609375 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.1295166015625 0.1295166015625 0.1295166015625 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1300048828125 0.1300048828125 0.1300048828125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.1304931640625 0.1304931640625 0.1304931640625 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.1307373046875 0.1307373046875 0.1307373046875 0.1307373046875 0.1307373046875 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.131103515625 0.131103515625 0.131103515625 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.131591796875 0.131591796875 0.131591796875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.132080078125 0.132080078125 0.132080078125 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.13232421875 0.13232421875 0.13232421875 0.13232421875 0.13232421875 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.132568359375 0.132568359375 0.132568359375 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1328125 0.1328125 0.1328125 0.1328125 0.1328125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.133056640625 0.133056640625 0.133056640625 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.1336669921875 0.1336669921875 0.1336669921875 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1339111328125 0.1339111328125 0.1339111328125 0.1339111328125 0.1339111328125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.1341552734375 0.1341552734375 0.1341552734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.1346435546875 0.1346435546875 0.1346435546875 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.1348876953125 0.1348876953125 0.1348876953125 0.1348876953125 0.1348876953125 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.1351318359375 0.1351318359375 0.1351318359375 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1357421875 0.1357421875 0.1357421875 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.13623046875 0.13623046875 0.13623046875 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.136474609375 0.136474609375 0.136474609375 0.136474609375 0.136474609375 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.13671875 0.13671875 0.13671875 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.13720703125 0.13720703125 0.13720703125 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1376953125 0.1376953125 0.1376953125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.1380615234375 0.1380615234375 0.1380615234375 0.1380615234375 0.1380615234375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.1383056640625 0.1383056640625 0.1383056640625 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.1387939453125 0.1387939453125 0.1387939453125 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.1392822265625 0.1392822265625 0.1392822265625 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.1395263671875 0.1395263671875 0.1395263671875 0.1395263671875 0.1395263671875 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1397705078125 0.1397705078125 0.1397705078125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.1402587890625 0.1402587890625 0.1402587890625 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.140625 0.140625 0.140625 0.140625 0.140625 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.140869140625 0.140869140625 0.140869140625 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.14111328125 0.14111328125 0.14111328125 0.14111328125 0.14111328125 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.141357421875 0.141357421875 0.141357421875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.141845703125 0.141845703125 0.141845703125 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.14208984375 0.14208984375 0.14208984375 0.14208984375 0.14208984375 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.142333984375 0.142333984375 0.142333984375 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.142578125 0.142578125 0.142578125 0.142578125 0.142578125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.1429443359375 0.1429443359375 0.1429443359375 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.1434326171875 0.1434326171875 0.1434326171875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1436767578125 0.1436767578125 0.1436767578125 0.1436767578125 0.1436767578125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.1439208984375 0.1439208984375 0.1439208984375 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.1444091796875 0.1444091796875 0.1444091796875 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.1448974609375 0.1448974609375 0.1448974609375 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.145263671875 0.145263671875 0.145263671875 0.145263671875 0.145263671875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1455078125 0.1455078125 0.1455078125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.14599609375 0.14599609375 0.14599609375 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.146484375 0.146484375 0.146484375 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.146728515625 0.146728515625 0.146728515625 0.146728515625 0.146728515625 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.14697265625 0.14697265625 0.14697265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1475830078125 0.1475830078125 0.1475830078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.1480712890625 0.1480712890625 0.1480712890625 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.1483154296875 0.1483154296875 0.1483154296875 0.1483154296875 0.1483154296875 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1485595703125 0.1485595703125 0.1485595703125 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.1490478515625 0.1490478515625 0.1490478515625 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1495361328125 0.1495361328125 0.1495361328125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.14990234375 0.14990234375 0.14990234375 0.14990234375 0.14990234375 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.150146484375 0.150146484375 0.150146484375 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.150390625 0.150390625 0.150390625 0.150390625 0.150390625 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.150634765625 0.150634765625 0.150634765625 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.15087890625 0.15087890625 0.15087890625 0.15087890625 0.15087890625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.151123046875 0.151123046875 0.151123046875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.151611328125 0.151611328125 0.151611328125 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.1522216796875 0.1522216796875 0.1522216796875 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.1524658203125 0.1524658203125 0.1524658203125 0.1524658203125 0.1524658203125 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.1527099609375 0.1527099609375 0.1527099609375 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.1531982421875 0.1531982421875 0.1531982421875 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.1536865234375 0.1536865234375 0.1536865234375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.1539306640625 0.1539306640625 0.1539306640625 0.1539306640625 0.1539306640625 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.1541748046875 0.1541748046875 0.1541748046875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.1544189453125 0.1544189453125 0.1544189453125 0.1544189453125 0.1544189453125 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.15478515625 0.15478515625 0.15478515625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.155029296875 0.155029296875 0.155029296875 0.155029296875 0.155029296875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1552734375 0.1552734375 0.1552734375 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.155517578125 0.155517578125 0.155517578125 0.155517578125 0.155517578125 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.15576171875 0.15576171875 0.15576171875 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.156005859375 0.156005859375 0.156005859375 0.156005859375 0.156005859375 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.15625 0.15625 0.15625 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.156494140625 0.156494140625 0.156494140625 0.156494140625 0.156494140625 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.15673828125 0.15673828125 0.15673828125 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.1571044921875 0.1571044921875 0.1571044921875 0.1571044921875 0.1571044921875 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1573486328125 0.1573486328125 0.1573486328125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.1578369140625 0.1578369140625 0.1578369140625 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.1580810546875 0.1580810546875 0.1580810546875 0.1580810546875 0.1580810546875 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.1583251953125 0.1583251953125 0.1583251953125 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.1588134765625 0.1588134765625 0.1588134765625 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.159423828125 0.159423828125 0.159423828125 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.15966796875 0.15966796875 0.15966796875 0.15966796875 0.15966796875 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.159912109375 0.159912109375 0.159912109375 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.160400390625 0.160400390625 0.160400390625 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.160888671875 0.160888671875 0.160888671875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1611328125 0.1611328125 0.1611328125 0.1611328125 0.1611328125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.161376953125 0.161376953125 0.161376953125 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.1619873046875 0.1619873046875 0.1619873046875 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.1624755859375 0.1624755859375 0.1624755859375 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.1627197265625 0.1627197265625 0.1627197265625 0.1627197265625 0.1627197265625 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.1629638671875 0.1629638671875 0.1629638671875 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.1634521484375 0.1634521484375 0.1634521484375 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.1636962890625 0.1636962890625 0.1636962890625 0.1636962890625 0.1636962890625 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.177734375 0.177734375 0.177734375 0.177734375 0.177734375 0.177734375 0.177734375 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1796875 0.1796875 0.1796875 0.1796875 0.1796875 0.1796875 0.1796875 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1953125 0.1953125 0.1953125 0.1953125 0.1953125 0.1953125 0.1953125 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.197265625 0.197265625 0.197265625 0.197265625 0.197265625 0.197265625 0.197265625 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.236328125 0.236328125 0.236328125 0.236328125 0.236328125 0.236328125 0.236328125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.23828125 0.23828125 0.23828125 0.23828125 0.23828125 0.23828125 0.23828125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_outputs.data b/runtime/test/test_files/flash_attn_kvcache_outputs.data new file mode 100644 index 000000000..a99f1bb61 --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_outputs.data @@ -0,0 +1 @@ +0.0595703125 0.0595703125 0.059600830078125 0.05963134765625 0.05963134765625 0.059661865234375 0.0596923828125 0.059722900390625 0.059722900390625 0.059722900390625 0.05975341796875 0.059783935546875 0.059814453125 0.059814453125 0.059844970703125 0.05987548828125 0.05987548828125 0.059906005859375 0.059906005859375 0.059967041015625 0.059967041015625 0.059967041015625 0.060028076171875 0.060028076171875 0.06005859375 0.06005859375 0.06005859375 0.06011962890625 0.06011962890625 0.06011962890625 0.0601806640625 0.060211181640625 0.06024169921875 0.06024169921875 0.060272216796875 0.060302734375 0.060302734375 0.060333251953125 0.06036376953125 0.060394287109375 0.060394287109375 0.060394287109375 0.0604248046875 0.060455322265625 0.06048583984375 0.06048583984375 0.060516357421875 0.060546875 0.060546875 0.060546875 0.060577392578125 0.060638427734375 0.060638427734375 0.060638427734375 0.060699462890625 0.060699462890625 0.060699462890625 0.06072998046875 0.06072998046875 0.060791015625 0.060791015625 0.060791015625 0.06085205078125 0.06085205078125 0.0609130859375 0.0609130859375 0.0609130859375 0.06097412109375 0.06097412109375 0.06097412109375 0.06103515625 0.061065673828125 0.061065673828125 0.061065673828125 0.06109619140625 0.061126708984375 0.061126708984375 0.0611572265625 0.061187744140625 0.06121826171875 0.06121826171875 0.06121826171875 0.061248779296875 0.061279296875 0.061309814453125 0.061309814453125 0.06134033203125 0.061370849609375 0.061370849609375 0.061370849609375 0.0614013671875 0.06146240234375 0.06146240234375 0.06146240234375 0.0615234375 0.0615234375 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_outputs_kcache.data b/runtime/test/test_files/flash_attn_kvcache_outputs_kcache.data new file mode 100644 index 000000000..21a29b4b6 --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_outputs_kcache.data @@ -0,0 +1 @@ +0.0 2.4974346160888672e-05 5.0008296966552734e-05 7.49826431274414e-05 0.00010001659393310547 0.00012505054473876953 0.0001499652862548828 0.00017499923706054688 0.00020003318786621094 0.00022494792938232422 0.00025010108947753906 0.00027489662170410156 0.0002999305725097656 0.0003249645233154297 0.00034999847412109375 0.0003750324249267578 0.0004000663757324219 0.00042510032653808594 0.00044989585876464844 0.0004749298095703125 0.0005002021789550781 0.0005249977111816406 0.0005497932434082031 0.0005750656127929688 0.0005998611450195312 0.0006251335144042969 0.0006499290466308594 0.000675201416015625 0.0006999969482421875 0.00072479248046875 0.0007500648498535156 0.0007748603820800781 0.0008001327514648438 0.0008249282836914062 0.0008502006530761719 0.0008749961853027344 0.0008997917175292969 0.0009250640869140625 0.000949859619140625 0.0009751319885253906 0.0010004043579101562 0.0010251998901367188 0.0010499954223632812 0.0010747909545898438 0.0010995864868164062 0.001125335693359375 0.0011501312255859375 0.0011749267578125 0.0011997222900390625 0.0012254714965820312 0.0012502670288085938 0.0012750625610351562 0.0012998580932617188 0.0013246536254882812 0.00135040283203125 0.0013751983642578125 0.001399993896484375 0.0014247894287109375 0.0014495849609375 0.0014753341674804688 0.0015001296997070312 0.0015249252319335938 0.0015497207641601562 0.001575469970703125 0.0016002655029296875 0.00162506103515625 0.0016498565673828125 0.001674652099609375 0.0017004013061523438 0.0017251968383789062 0.0017499923706054688 0.0017747879028320312 0.0017995834350585938 0.0018253326416015625 0.001850128173828125 0.0018749237060546875 0.00189971923828125 0.0019254684448242188 0.0019502639770507812 0.0019741058349609375 0.0020008087158203125 0.002025604248046875 0.0020503997802734375 0.0020751953125 0.0020999908447265625 0.002124786376953125 0.0021495819091796875 0.00217437744140625 0.0021991729736328125 0.0022258758544921875 0.00225067138671875 0.0022754669189453125 0.002300262451171875 0.0023250579833984375 0.002349853515625 0.0023746490478515625 0.002399444580078125 0.0024242401123046875 0.0024509429931640625 0.002475738525390625 0.0025005340576171875 0.00252532958984375 0.0025501251220703125 0.002574920654296875 0.0025997161865234375 0.00262451171875 0.0026493072509765625 0.002674102783203125 0.0027008056640625 0.0027256011962890625 0.002750396728515625 0.0027751922607421875 0.00279998779296875 0.0028247833251953125 0.002849578857421875 0.0028743743896484375 0.002899169921875 0.002925872802734375 0.0029506683349609375 0.0029754638671875 0.0030002593994140625 0.003025054931640625 0.0030498504638671875 0.00307464599609375 0.0030994415283203125 0.003124237060546875 0.00315093994140625 0.0031757354736328125 0.003200531005859375 0.0032253265380859375 0.0032501220703125 0.0032749176025390625 0.003299713134765625 0.0033245086669921875 0.00334930419921875 0.0033740997314453125 0.0034008026123046875 0.00342559814453125 0.0034503936767578125 0.003475189208984375 0.0034999847412109375 0.0035247802734375 0.0035495758056640625 0.003574371337890625 0.0035991668701171875 0.0036258697509765625 0.003650665283203125 0.0036754608154296875 0.00370025634765625 0.0037250518798828125 0.003749847412109375 0.0037746429443359375 0.0037994384765625 0.0038242340087890625 0.0038509368896484375 0.003875732421875 0.0039005279541015625 0.003925323486328125 0.003948211669921875 0.00397491455078125 0.004001617431640625 0.004024505615234375 0.00405120849609375 0.0040740966796875 0.004100799560546875 0.004123687744140625 0.004150390625 0.00417327880859375 0.004199981689453125 0.0042266845703125 0.00424957275390625 0.004276275634765625 0.004299163818359375 0.00432586669921875 0.0043487548828125 0.004375457763671875 0.004398345947265625 0.004425048828125 0.004451751708984375 0.004474639892578125 0.0045013427734375 0.00452423095703125 0.004550933837890625 0.004573822021484375 0.00460052490234375 0.0046234130859375 0.004650115966796875 0.00467681884765625 0.00469970703125 0.004726409912109375 0.004749298095703125 0.0047760009765625 0.00479888916015625 0.004825592041015625 0.004848480224609375 0.00487518310546875 0.004901885986328125 0.004924774169921875 0.00495147705078125 0.004974365234375 0.005001068115234375 0.005023956298828125 0.0050506591796875 0.00507354736328125 0.005100250244140625 0.005123138427734375 0.00514984130859375 0.005176544189453125 0.005199432373046875 0.00522613525390625 0.0052490234375 0.005275726318359375 0.005298614501953125 0.0053253173828125 0.00534820556640625 0.005374908447265625 0.005401611328125 0.00542449951171875 0.005451202392578125 0.005474090576171875 0.00550079345703125 0.005523681640625 0.005550384521484375 0.005573272705078125 0.0055999755859375 0.005626678466796875 0.005649566650390625 0.00567626953125 0.00569915771484375 0.005725860595703125 0.005748748779296875 0.00577545166015625 0.00579833984375 0.005825042724609375 0.00585174560546875 0.0058746337890625 0.005901336669921875 0.005924224853515625 0.005950927734375 0.00597381591796875 0.006000518798828125 0.006023406982421875 0.00605010986328125 0.006076812744140625 0.006099700927734375 0.00612640380859375 0.0061492919921875 0.006175994873046875 0.006198883056640625 0.0062255859375 0.00624847412109375 0.006275177001953125 0.0063018798828125 0.00632476806640625 0.006351470947265625 0.006374359130859375 0.00640106201171875 0.0064239501953125 0.006450653076171875 0.006473541259765625 0.006500244140625 0.00652313232421875 0.006549835205078125 0.0065765380859375 0.00659942626953125 0.006626129150390625 0.006649017333984375 0.00667572021484375 0.0066986083984375 0.006725311279296875 0.006748199462890625 0.00677490234375 0.006801605224609375 0.006824493408203125 0.0068511962890625 0.00687408447265625 0.006900787353515625 0.006923675537109375 0.00695037841796875 0.0069732666015625 0.006999969482421875 0.00702667236328125 0.007049560546875 0.007076263427734375 0.007099151611328125 0.0071258544921875 0.00714874267578125 0.007175445556640625 0.007198333740234375 0.00722503662109375 0.007251739501953125 0.007274627685546875 0.00730133056640625 0.00732421875 0.007350921630859375 0.007373809814453125 0.0074005126953125 0.00742340087890625 0.007450103759765625 0.007476806640625 0.00749969482421875 0.007526397705078125 0.007549285888671875 0.00757598876953125 0.007598876953125 0.007625579833984375 0.007648468017578125 0.0076751708984375 0.007701873779296875 0.007724761962890625 0.00775146484375 0.00777435302734375 0.007801055908203125 0.0078277587890625 0.00785064697265625 0.00787353515625 0.00789642333984375 0.00792694091796875 0.0079498291015625 0.00797271728515625 0.00800323486328125 0.008026123046875 0.00804901123046875 0.0080718994140625 0.0081024169921875 0.00812530517578125 0.008148193359375 0.0081787109375 0.00820159912109375 0.0082244873046875 0.00824737548828125 0.00827789306640625 0.00830078125 0.00832366943359375 0.0083465576171875 0.0083770751953125 0.00839996337890625 0.0084228515625 0.008453369140625 0.00847625732421875 0.0084991455078125 0.00852203369140625 0.00855255126953125 0.008575439453125 0.00859832763671875 0.0086212158203125 0.0086517333984375 0.00867462158203125 0.008697509765625 0.00872802734375 0.00875091552734375 0.0087738037109375 0.00879669189453125 0.00882720947265625 0.00885009765625 0.00887298583984375 0.00890350341796875 0.0089263916015625 0.00894927978515625 0.00897216796875 0.009002685546875 0.00902557373046875 0.0090484619140625 0.00907135009765625 0.00910186767578125 0.009124755859375 0.00914764404296875 0.00917816162109375 0.0092010498046875 0.00922393798828125 0.009246826171875 0.00927734375 0.00930023193359375 0.0093231201171875 0.0093536376953125 0.00937652587890625 0.0093994140625 0.00942230224609375 0.00945281982421875 0.0094757080078125 0.00949859619140625 0.009521484375 0.009552001953125 0.00957489013671875 0.0095977783203125 0.0096282958984375 0.00965118408203125 0.009674072265625 0.00969696044921875 0.00972747802734375 0.0097503662109375 0.00977325439453125 0.00980377197265625 0.00982666015625 0.00984954833984375 0.0098724365234375 0.0099029541015625 0.00992584228515625 0.00994873046875 0.00997161865234375 0.01000213623046875 0.0100250244140625 0.01004791259765625 0.01007843017578125 0.010101318359375 0.01012420654296875 0.0101470947265625 0.0101776123046875 0.01020050048828125 0.010223388671875 0.01024627685546875 0.01027679443359375 0.0102996826171875 0.01032257080078125 0.01035308837890625 0.0103759765625 0.01039886474609375 0.0104217529296875 0.0104522705078125 0.01047515869140625 0.010498046875 0.010528564453125 0.01055145263671875 0.0105743408203125 0.01059722900390625 0.01062774658203125 0.010650634765625 0.01067352294921875 0.0106964111328125 0.0107269287109375 0.01074981689453125 0.010772705078125 0.01080322265625 0.01082611083984375 0.0108489990234375 0.01087188720703125 0.01090240478515625 0.01092529296875 0.01094818115234375 0.01097869873046875 0.0110015869140625 0.01102447509765625 0.01104736328125 0.011077880859375 0.01110076904296875 0.0111236572265625 0.01114654541015625 0.01117706298828125 0.011199951171875 0.01122283935546875 0.01125335693359375 0.0112762451171875 0.01129913330078125 0.011322021484375 0.0113525390625 0.01137542724609375 0.0113983154296875 0.01142120361328125 0.01145172119140625 0.011474609375 0.01149749755859375 0.01152801513671875 0.0115509033203125 0.01157379150390625 0.0115966796875 0.011627197265625 0.01165008544921875 0.0116729736328125 0.0117034912109375 0.01172637939453125 0.011749267578125 0.01177215576171875 0.01180267333984375 0.0118255615234375 0.01184844970703125 0.011871337890625 0.01190185546875 0.01192474365234375 0.0119476318359375 0.0119781494140625 0.01200103759765625 0.01202392578125 0.01204681396484375 0.01207733154296875 0.0121002197265625 0.01212310791015625 0.01215362548828125 0.012176513671875 0.01219940185546875 0.0122222900390625 0.0122528076171875 0.01227569580078125 0.012298583984375 0.01232147216796875 0.01235198974609375 0.0123748779296875 0.01239776611328125 0.01242828369140625 0.012451171875 0.01247406005859375 0.0124969482421875 0.0125274658203125 0.01255035400390625 0.0125732421875 0.012603759765625 0.01262664794921875 0.0126495361328125 0.01267242431640625 0.01270294189453125 0.012725830078125 0.01274871826171875 0.0127716064453125 0.0128021240234375 0.01282501220703125 0.012847900390625 0.01287841796875 0.01290130615234375 0.0129241943359375 0.01294708251953125 0.01297760009765625 0.01300048828125 0.01302337646484375 0.0130462646484375 0.0130767822265625 0.01309967041015625 0.01312255859375 0.013153076171875 0.01317596435546875 0.0131988525390625 0.01322174072265625 0.01325225830078125 0.013275146484375 0.01329803466796875 0.01332855224609375 0.0133514404296875 0.01337432861328125 0.013397216796875 0.013427734375 0.01345062255859375 0.0134735107421875 0.01349639892578125 0.01352691650390625 0.0135498046875 0.01357269287109375 0.01360321044921875 0.0136260986328125 0.01364898681640625 0.013671875 0.013702392578125 0.01372528076171875 0.0137481689453125 0.0137786865234375 0.01380157470703125 0.013824462890625 0.01384735107421875 0.01387786865234375 0.0139007568359375 0.01392364501953125 0.013946533203125 0.01397705078125 0.01399993896484375 0.0140228271484375 0.0140533447265625 0.01407623291015625 0.01409912109375 0.01412200927734375 0.01415252685546875 0.0141754150390625 0.01419830322265625 0.01422119140625 0.014251708984375 0.01427459716796875 0.0142974853515625 0.0143280029296875 0.01435089111328125 0.014373779296875 0.01439666748046875 0.01442718505859375 0.0144500732421875 0.01447296142578125 0.01450347900390625 0.0145263671875 0.01454925537109375 0.0145721435546875 0.0146026611328125 0.01462554931640625 0.0146484375 0.01467132568359375 0.01470184326171875 0.0147247314453125 0.01474761962890625 0.01477813720703125 0.014801025390625 0.01482391357421875 0.0148468017578125 0.0148773193359375 0.01490020751953125 0.014923095703125 0.01495361328125 0.01497650146484375 0.0149993896484375 0.01502227783203125 0.01505279541015625 0.01507568359375 0.01509857177734375 0.0151214599609375 0.0151519775390625 0.01517486572265625 0.01519775390625 0.015228271484375 0.01525115966796875 0.0152740478515625 0.01529693603515625 0.01532745361328125 0.015350341796875 0.01537322998046875 0.01540374755859375 0.0154266357421875 0.01544952392578125 0.015472412109375 0.0155029296875 0.01552581787109375 0.0155487060546875 0.01557159423828125 0.01560211181640625 0.015625 0.015655517578125 0.0156707763671875 0.0157012939453125 0.0157318115234375 0.0157470703125 0.015777587890625 0.0157928466796875 0.0158233642578125 0.0158538818359375 0.015869140625 0.015899658203125 0.01593017578125 0.0159454345703125 0.0159759521484375 0.0160064697265625 0.016021728515625 0.01605224609375 0.0160675048828125 0.0160980224609375 0.0161285400390625 0.016143798828125 0.01617431640625 0.016204833984375 0.0162200927734375 0.0162506103515625 0.0162811279296875 0.01629638671875 0.016326904296875 0.016357421875 0.0163726806640625 0.0164031982421875 0.01641845703125 0.016448974609375 0.0164794921875 0.0164947509765625 0.0165252685546875 0.0165557861328125 0.016571044921875 0.0166015625 0.016632080078125 0.0166473388671875 0.0166778564453125 0.016693115234375 0.0167236328125 0.016754150390625 0.0167694091796875 0.0167999267578125 0.0168304443359375 0.016845703125 0.016876220703125 0.01690673828125 0.0169219970703125 0.0169525146484375 0.0169677734375 0.016998291015625 0.01702880859375 0.0170440673828125 0.0170745849609375 0.0171051025390625 0.017120361328125 0.01715087890625 0.017181396484375 0.0171966552734375 0.0172271728515625 0.017242431640625 0.01727294921875 0.017303466796875 0.0173187255859375 0.0173492431640625 0.0173797607421875 0.01739501953125 0.017425537109375 0.0174560546875 0.0174713134765625 0.0175018310546875 0.0175323486328125 0.017547607421875 0.017578125 0.0175933837890625 0.0176239013671875 0.0176544189453125 0.017669677734375 0.0177001953125 0.017730712890625 0.0177459716796875 0.0177764892578125 0.0178070068359375 0.017822265625 0.017852783203125 0.0178680419921875 0.0178985595703125 0.0179290771484375 0.0179443359375 0.017974853515625 0.01800537109375 0.0180206298828125 0.0180511474609375 0.0180816650390625 0.018096923828125 0.01812744140625 0.0181427001953125 0.0181732177734375 0.0182037353515625 0.018218994140625 0.01824951171875 0.018280029296875 0.0182952880859375 0.0183258056640625 0.0183563232421875 0.01837158203125 0.018402099609375 0.0184326171875 0.0184478759765625 0.0184783935546875 0.01849365234375 0.018524169921875 0.0185546875 0.0185699462890625 0.0186004638671875 0.0186309814453125 0.018646240234375 0.0186767578125 0.018707275390625 0.0187225341796875 0.0187530517578125 0.018768310546875 0.018798828125 0.018829345703125 0.0188446044921875 0.0188751220703125 0.0189056396484375 0.0189208984375 0.018951416015625 0.01898193359375 0.0189971923828125 0.0190277099609375 0.01904296875 0.019073486328125 0.01910400390625 0.0191192626953125 0.0191497802734375 0.0191802978515625 0.019195556640625 0.01922607421875 0.019256591796875 0.0192718505859375 0.0193023681640625 0.019317626953125 0.01934814453125 0.019378662109375 0.0193939208984375 0.0194244384765625 0.0194549560546875 0.01947021484375 0.019500732421875 0.01953125 0.0195465087890625 0.0195770263671875 0.0196075439453125 0.019622802734375 0.0196533203125 0.0196685791015625 0.0196990966796875 0.0197296142578125 0.019744873046875 0.019775390625 0.019805908203125 0.0198211669921875 0.0198516845703125 0.0198822021484375 0.0198974609375 0.019927978515625 0.0199432373046875 0.0199737548828125 0.0200042724609375 0.02001953125 0.020050048828125 0.02008056640625 0.0200958251953125 0.0201263427734375 0.0201568603515625 0.020172119140625 0.02020263671875 0.0202178955078125 0.0202484130859375 0.0202789306640625 0.020294189453125 0.02032470703125 0.020355224609375 0.0203704833984375 0.0204010009765625 0.0204315185546875 0.02044677734375 0.020477294921875 0.0204925537109375 0.0205230712890625 0.0205535888671875 0.02056884765625 0.020599365234375 0.0206298828125 0.0206451416015625 0.0206756591796875 0.0207061767578125 0.020721435546875 0.020751953125 0.020782470703125 0.0207977294921875 0.0208282470703125 0.020843505859375 0.0208740234375 0.020904541015625 0.0209197998046875 0.0209503173828125 0.0209808349609375 0.02099609375 0.021026611328125 0.02105712890625 0.0210723876953125 0.0211029052734375 0.0211181640625 0.021148681640625 0.02117919921875 0.0211944580078125 0.0212249755859375 0.0212554931640625 0.021270751953125 0.02130126953125 0.021331787109375 0.0213470458984375 0.0213775634765625 0.021392822265625 0.02142333984375 0.021453857421875 0.0214691162109375 0.0214996337890625 0.0215301513671875 0.02154541015625 0.021575927734375 0.0216064453125 0.0216217041015625 0.0216522216796875 0.02166748046875 0.021697998046875 0.021728515625 0.0217437744140625 0.0217742919921875 0.0218048095703125 0.021820068359375 0.0218505859375 0.021881103515625 0.0218963623046875 0.0219268798828125 0.0219573974609375 0.02197265625 0.022003173828125 0.0220184326171875 0.0220489501953125 0.0220794677734375 0.0220947265625 0.022125244140625 0.02215576171875 0.0221710205078125 0.0222015380859375 0.0222320556640625 0.022247314453125 0.02227783203125 0.0222930908203125 0.0223236083984375 0.0223541259765625 0.022369384765625 0.02239990234375 0.022430419921875 0.0224456787109375 0.0224761962890625 0.0225067138671875 0.02252197265625 0.022552490234375 0.0225677490234375 0.0225982666015625 0.0226287841796875 0.02264404296875 0.022674560546875 0.022705078125 0.0227203369140625 0.0227508544921875 0.0227813720703125 0.022796630859375 0.0228271484375 0.0228424072265625 0.0228729248046875 0.0229034423828125 0.022918701171875 0.02294921875 0.022979736328125 0.0229949951171875 0.0230255126953125 0.0230560302734375 0.0230712890625 0.023101806640625 0.02313232421875 0.0231475830078125 0.0231781005859375 0.023193359375 0.023223876953125 0.02325439453125 0.0232696533203125 0.0233001708984375 0.0233306884765625 0.023345947265625 0.02337646484375 0.023406982421875 0.0234222412109375 0.0234527587890625 0.023468017578125 0.02349853515625 0.023529052734375 0.0235443115234375 0.0235748291015625 0.0236053466796875 0.02362060546875 0.023651123046875 0.023681640625 0.0236968994140625 0.0237274169921875 0.02374267578125 0.023773193359375 0.0238037109375 0.0238189697265625 0.0238494873046875 0.0238800048828125 0.023895263671875 0.02392578125 0.023956298828125 0.0239715576171875 0.0240020751953125 0.0240325927734375 0.0240478515625 0.024078369140625 0.0240936279296875 0.0241241455078125 0.0241546630859375 0.024169921875 0.024200439453125 0.02423095703125 0.0242462158203125 0.0242767333984375 0.0243072509765625 0.024322509765625 0.02435302734375 0.0243682861328125 0.0243988037109375 0.0244293212890625 0.024444580078125 0.02447509765625 0.024505615234375 0.0245208740234375 0.0245513916015625 0.0245819091796875 0.02459716796875 0.024627685546875 0.0246429443359375 0.0246734619140625 0.0247039794921875 0.02471923828125 0.024749755859375 0.0247802734375 0.0247955322265625 0.0248260498046875 0.0248565673828125 0.024871826171875 0.02490234375 0.0249176025390625 0.0249481201171875 0.0249786376953125 0.024993896484375 0.0250244140625 0.025054931640625 0.0250701904296875 0.0251007080078125 0.0251312255859375 0.025146484375 0.025177001953125 0.02520751953125 0.0252227783203125 0.0252532958984375 0.0252685546875 0.025299072265625 0.02532958984375 0.0253448486328125 0.0253753662109375 0.0254058837890625 0.025421142578125 0.02545166015625 0.025482177734375 0.0254974365234375 0.0255279541015625 0.025543212890625 0.02557373046875 0.025604248046875 0.0256195068359375 0.0256500244140625 0.0256805419921875 0.02569580078125 0.025726318359375 0.0257568359375 0.0257720947265625 0.0258026123046875 0.02581787109375 0.025848388671875 0.02587890625 0.0258941650390625 0.0259246826171875 0.0259552001953125 0.025970458984375 0.0260009765625 0.026031494140625 0.0260467529296875 0.0260772705078125 0.026092529296875 0.026123046875 0.026153564453125 0.0261688232421875 0.0261993408203125 0.0262298583984375 0.0262451171875 0.026275634765625 0.02630615234375 0.0263214111328125 0.0263519287109375 0.0263824462890625 0.026397705078125 0.02642822265625 0.0264434814453125 0.0264739990234375 0.0265045166015625 0.026519775390625 0.02655029296875 0.026580810546875 0.0265960693359375 0.0266265869140625 0.0266571044921875 0.02667236328125 0.026702880859375 0.0267181396484375 0.0267486572265625 0.0267791748046875 0.02679443359375 0.026824951171875 0.02685546875 0.0268707275390625 0.0269012451171875 0.0269317626953125 0.026947021484375 0.0269775390625 0.0269927978515625 0.0270233154296875 0.0270538330078125 0.027069091796875 0.027099609375 0.027130126953125 0.0271453857421875 0.0271759033203125 0.0272064208984375 0.0272216796875 0.027252197265625 0.0272674560546875 0.0272979736328125 0.0273284912109375 0.02734375 0.027374267578125 0.02740478515625 0.0274200439453125 0.0274505615234375 0.0274810791015625 0.027496337890625 0.02752685546875 0.027557373046875 0.0275726318359375 0.0276031494140625 0.027618408203125 0.02764892578125 0.027679443359375 0.0276947021484375 0.0277252197265625 0.0277557373046875 0.02777099609375 0.027801513671875 0.02783203125 0.0278472900390625 0.0278778076171875 0.02789306640625 0.027923583984375 0.0279541015625 0.0279693603515625 0.0279998779296875 0.0280303955078125 0.028045654296875 0.028076171875 0.028106689453125 0.0281219482421875 0.0281524658203125 0.028167724609375 0.0281982421875 0.028228759765625 0.0282440185546875 0.0282745361328125 0.0283050537109375 0.0283203125 0.028350830078125 0.02838134765625 0.0283966064453125 0.0284271240234375 0.0284423828125 0.028472900390625 0.02850341796875 0.0285186767578125 0.0285491943359375 0.0285797119140625 0.028594970703125 0.02862548828125 0.028656005859375 0.0286712646484375 0.0287017822265625 0.0287322998046875 0.02874755859375 0.028778076171875 0.0287933349609375 0.0288238525390625 0.0288543701171875 0.02886962890625 0.028900146484375 0.0289306640625 0.0289459228515625 0.0289764404296875 0.0290069580078125 0.029022216796875 0.029052734375 0.0290679931640625 0.0290985107421875 0.0291290283203125 0.029144287109375 0.0291748046875 0.029205322265625 0.0292205810546875 0.0292510986328125 0.0292816162109375 0.029296875 0.029327392578125 0.0293426513671875 0.0293731689453125 0.0294036865234375 0.0294189453125 0.029449462890625 0.02947998046875 0.0294952392578125 0.0295257568359375 0.0295562744140625 0.029571533203125 0.02960205078125 0.029632568359375 0.0296478271484375 0.0296783447265625 0.029693603515625 0.02972412109375 0.029754638671875 0.0297698974609375 0.0298004150390625 0.0298309326171875 0.02984619140625 0.029876708984375 0.0299072265625 0.0299224853515625 0.0299530029296875 0.02996826171875 0.029998779296875 0.030029296875 0.0300445556640625 0.0300750732421875 0.0301055908203125 0.030120849609375 0.0301513671875 0.030181884765625 0.0301971435546875 0.0302276611328125 0.030242919921875 0.0302734375 0.030303955078125 0.0303192138671875 0.0303497314453125 0.0303802490234375 0.0303955078125 0.030426025390625 0.03045654296875 0.0304718017578125 0.0305023193359375 0.030517578125 0.030548095703125 0.03057861328125 0.0305938720703125 0.0306243896484375 0.0306549072265625 0.030670166015625 0.03070068359375 0.030731201171875 0.0307464599609375 0.0307769775390625 0.0308074951171875 0.03082275390625 0.030853271484375 0.0308685302734375 0.0308990478515625 0.0309295654296875 0.03094482421875 0.030975341796875 0.031005859375 0.0310211181640625 0.0310516357421875 0.0310821533203125 0.031097412109375 0.0311279296875 0.0311431884765625 0.0311737060546875 0.0312042236328125 0.031219482421875 0.03125 0.031280517578125 0.03131103515625 0.03131103515625 0.031341552734375 0.0313720703125 0.031402587890625 0.03143310546875 0.031463623046875 0.031463623046875 0.031494140625 0.031524658203125 0.03155517578125 0.031585693359375 0.031585693359375 0.0316162109375 0.031646728515625 0.03167724609375 0.031707763671875 0.03173828125 0.03173828125 0.031768798828125 0.03179931640625 0.031829833984375 0.0318603515625 0.0318603515625 0.031890869140625 0.03192138671875 0.031951904296875 0.031982421875 0.032012939453125 0.032012939453125 0.03204345703125 0.032073974609375 0.0321044921875 0.032135009765625 0.032135009765625 0.03216552734375 0.032196044921875 0.0322265625 0.032257080078125 0.03228759765625 0.03228759765625 0.032318115234375 0.0323486328125 0.032379150390625 0.03240966796875 0.032440185546875 0.032440185546875 0.032470703125 0.032501220703125 0.03253173828125 0.032562255859375 0.032562255859375 0.0325927734375 0.032623291015625 0.03265380859375 0.032684326171875 0.03271484375 0.03271484375 0.032745361328125 0.03277587890625 0.032806396484375 0.0328369140625 0.0328369140625 0.032867431640625 0.03289794921875 0.032928466796875 0.032958984375 0.032989501953125 0.032989501953125 0.03302001953125 0.033050537109375 0.0330810546875 0.033111572265625 0.033111572265625 0.03314208984375 0.033172607421875 0.033203125 0.033233642578125 0.03326416015625 0.03326416015625 0.033294677734375 0.0333251953125 0.033355712890625 0.03338623046875 0.03338623046875 0.033416748046875 0.033447265625 0.033477783203125 0.03350830078125 0.033538818359375 0.033538818359375 0.0335693359375 0.033599853515625 0.03363037109375 0.033660888671875 0.033660888671875 0.03369140625 0.033721923828125 0.03375244140625 0.033782958984375 0.0338134765625 0.0338134765625 0.033843994140625 0.03387451171875 0.033905029296875 0.033935546875 0.033935546875 0.033966064453125 0.03399658203125 0.034027099609375 0.0340576171875 0.034088134765625 0.034088134765625 0.03411865234375 0.034149169921875 0.0341796875 0.034210205078125 0.034210205078125 0.03424072265625 0.034271240234375 0.0343017578125 0.034332275390625 0.03436279296875 0.03436279296875 0.034393310546875 0.034423828125 0.034454345703125 0.03448486328125 0.03448486328125 0.034515380859375 0.0345458984375 0.034576416015625 0.03460693359375 0.034637451171875 0.034637451171875 0.03466796875 0.034698486328125 0.03472900390625 0.034759521484375 0.0347900390625 0.0347900390625 0.034820556640625 0.03485107421875 0.034881591796875 0.034912109375 0.034912109375 0.034942626953125 0.03497314453125 0.035003662109375 0.0350341796875 0.035064697265625 0.035064697265625 0.03509521484375 0.035125732421875 0.03515625 0.035186767578125 0.035186767578125 0.03521728515625 0.035247802734375 0.0352783203125 0.035308837890625 0.03533935546875 0.03533935546875 0.035369873046875 0.035400390625 0.035430908203125 0.03546142578125 0.03546142578125 0.035491943359375 0.0355224609375 0.035552978515625 0.03558349609375 0.035614013671875 0.035614013671875 0.03564453125 0.035675048828125 0.03570556640625 0.035736083984375 0.035736083984375 0.0357666015625 0.035797119140625 0.03582763671875 0.035858154296875 0.035888671875 0.035888671875 0.035919189453125 0.03594970703125 0.035980224609375 0.0360107421875 0.0360107421875 0.036041259765625 0.03607177734375 0.036102294921875 0.0361328125 0.036163330078125 0.036163330078125 0.03619384765625 0.036224365234375 0.0362548828125 0.036285400390625 0.036285400390625 0.03631591796875 0.036346435546875 0.036376953125 0.036407470703125 0.03643798828125 0.03643798828125 0.036468505859375 0.0364990234375 0.036529541015625 0.03656005859375 0.03656005859375 0.036590576171875 0.03662109375 0.036651611328125 0.03668212890625 0.036712646484375 0.036712646484375 0.0367431640625 0.036773681640625 0.03680419921875 0.036834716796875 0.036865234375 0.036865234375 0.036895751953125 0.03692626953125 0.036956787109375 0.0369873046875 0.0369873046875 0.037017822265625 0.03704833984375 0.037078857421875 0.037109375 0.037139892578125 0.037139892578125 0.03717041015625 0.037200927734375 0.0372314453125 0.037261962890625 0.037261962890625 0.03729248046875 0.037322998046875 0.037353515625 0.037384033203125 0.03741455078125 0.03741455078125 0.037445068359375 0.0374755859375 0.037506103515625 0.03753662109375 0.03753662109375 0.037567138671875 0.03759765625 0.037628173828125 0.03765869140625 0.037689208984375 0.037689208984375 0.0377197265625 0.037750244140625 0.03778076171875 0.037811279296875 0.037811279296875 0.037841796875 0.037872314453125 0.03790283203125 0.037933349609375 0.0379638671875 0.0379638671875 0.037994384765625 0.03802490234375 0.038055419921875 0.0380859375 0.0380859375 0.038116455078125 0.03814697265625 0.038177490234375 0.0382080078125 0.038238525390625 0.038238525390625 0.03826904296875 0.038299560546875 0.038330078125 0.038360595703125 0.038360595703125 0.03839111328125 0.038421630859375 0.0384521484375 0.038482666015625 0.03851318359375 0.03851318359375 0.038543701171875 0.03857421875 0.038604736328125 0.03863525390625 0.03863525390625 0.038665771484375 0.0386962890625 0.038726806640625 0.03875732421875 0.038787841796875 0.038787841796875 0.038818359375 0.038848876953125 0.03887939453125 0.038909912109375 0.038909912109375 0.0389404296875 0.038970947265625 0.03900146484375 0.039031982421875 0.0390625 0.0390625 0.039093017578125 0.03912353515625 0.039154052734375 0.0391845703125 0.039215087890625 0.039215087890625 0.03924560546875 0.039276123046875 0.039306640625 0.039337158203125 0.039337158203125 0.03936767578125 0.039398193359375 0.0394287109375 0.039459228515625 0.03948974609375 0.03948974609375 0.039520263671875 0.03955078125 0.039581298828125 0.03961181640625 0.03961181640625 0.039642333984375 0.0396728515625 0.039703369140625 0.03973388671875 0.039764404296875 0.039764404296875 0.039794921875 0.039825439453125 0.03985595703125 0.039886474609375 0.039886474609375 0.0399169921875 0.039947509765625 0.03997802734375 0.040008544921875 0.0400390625 0.0400390625 0.040069580078125 0.04010009765625 0.040130615234375 0.0401611328125 0.0401611328125 0.040191650390625 0.04022216796875 0.040252685546875 0.040283203125 0.040313720703125 0.040313720703125 0.04034423828125 0.040374755859375 0.0404052734375 0.040435791015625 0.040435791015625 0.04046630859375 0.040496826171875 0.04052734375 0.040557861328125 0.04058837890625 0.04058837890625 0.040618896484375 0.0406494140625 0.040679931640625 0.04071044921875 0.04071044921875 0.040740966796875 0.040771484375 0.040802001953125 0.04083251953125 0.040863037109375 0.040863037109375 0.0408935546875 0.040924072265625 0.04095458984375 0.040985107421875 0.040985107421875 0.041015625 0.041046142578125 0.04107666015625 0.041107177734375 0.0411376953125 0.0411376953125 0.041168212890625 0.04119873046875 0.041229248046875 0.041259765625 0.041259765625 0.041290283203125 0.04132080078125 0.041351318359375 0.0413818359375 0.041412353515625 0.041412353515625 0.04144287109375 0.041473388671875 0.04150390625 0.041534423828125 0.04156494140625 0.04156494140625 0.041595458984375 0.0416259765625 0.041656494140625 0.04168701171875 0.04168701171875 0.041717529296875 0.041748046875 0.041778564453125 0.04180908203125 0.041839599609375 0.041839599609375 0.0418701171875 0.041900634765625 0.04193115234375 0.041961669921875 0.041961669921875 0.0419921875 0.042022705078125 0.04205322265625 0.042083740234375 0.0421142578125 0.0421142578125 0.042144775390625 0.04217529296875 0.042205810546875 0.042236328125 0.042236328125 0.042266845703125 0.04229736328125 0.042327880859375 0.0423583984375 0.042388916015625 0.042388916015625 0.04241943359375 0.042449951171875 0.04248046875 0.042510986328125 0.042510986328125 0.04254150390625 0.042572021484375 0.0426025390625 0.042633056640625 0.04266357421875 0.04266357421875 0.042694091796875 0.042724609375 0.042755126953125 0.04278564453125 0.04278564453125 0.042816162109375 0.0428466796875 0.042877197265625 0.04290771484375 0.042938232421875 0.042938232421875 0.04296875 0.042999267578125 0.04302978515625 0.043060302734375 0.043060302734375 0.0430908203125 0.043121337890625 0.04315185546875 0.043182373046875 0.043212890625 0.043212890625 0.043243408203125 0.04327392578125 0.043304443359375 0.0433349609375 0.0433349609375 0.043365478515625 0.04339599609375 0.043426513671875 0.04345703125 0.043487548828125 0.043487548828125 0.04351806640625 0.043548583984375 0.0435791015625 0.043609619140625 0.04364013671875 0.04364013671875 0.043670654296875 0.043701171875 0.043731689453125 0.04376220703125 0.04376220703125 0.043792724609375 0.0438232421875 0.043853759765625 0.04388427734375 0.043914794921875 0.043914794921875 0.0439453125 0.043975830078125 0.04400634765625 0.044036865234375 0.044036865234375 0.0440673828125 0.044097900390625 0.04412841796875 0.044158935546875 0.044189453125 0.044189453125 0.044219970703125 0.04425048828125 0.044281005859375 0.0443115234375 0.0443115234375 0.044342041015625 0.04437255859375 0.044403076171875 0.04443359375 0.044464111328125 0.044464111328125 0.04449462890625 0.044525146484375 0.0445556640625 0.044586181640625 0.044586181640625 0.04461669921875 0.044647216796875 0.044677734375 0.044708251953125 0.04473876953125 0.04473876953125 0.044769287109375 0.0447998046875 0.044830322265625 0.04486083984375 0.04486083984375 0.044891357421875 0.044921875 0.044952392578125 0.04498291015625 0.045013427734375 0.045013427734375 0.0450439453125 0.045074462890625 0.04510498046875 0.045135498046875 0.045135498046875 0.045166015625 0.045196533203125 0.04522705078125 0.045257568359375 0.0452880859375 0.0452880859375 0.045318603515625 0.04534912109375 0.045379638671875 0.04541015625 0.04541015625 0.045440673828125 0.04547119140625 0.045501708984375 0.0455322265625 0.045562744140625 0.045562744140625 0.04559326171875 0.045623779296875 0.045654296875 0.045684814453125 0.045684814453125 0.04571533203125 0.045745849609375 0.0457763671875 0.045806884765625 0.04583740234375 0.04583740234375 0.045867919921875 0.0458984375 0.045928955078125 0.04595947265625 0.045989990234375 0.045989990234375 0.0460205078125 0.046051025390625 0.04608154296875 0.046112060546875 0.046112060546875 0.046142578125 0.046173095703125 0.04620361328125 0.046234130859375 0.0462646484375 0.0462646484375 0.046295166015625 0.04632568359375 0.046356201171875 0.04638671875 0.04638671875 0.046417236328125 0.04644775390625 0.046478271484375 0.0465087890625 0.046539306640625 0.046539306640625 0.04656982421875 0.046600341796875 0.046630859375 0.046661376953125 0.046661376953125 0.04669189453125 0.046722412109375 0.0467529296875 0.046783447265625 0.04681396484375 0.04681396484375 0.046844482421875 0.046875 0.046905517578125 0.04693603515625 0.04693603515625 0.046966552734375 0.0469970703125 0.047027587890625 0.04705810546875 0.047088623046875 0.047088623046875 0.047119140625 0.047149658203125 0.04718017578125 0.047210693359375 0.047210693359375 0.0472412109375 0.047271728515625 0.04730224609375 0.047332763671875 0.04736328125 0.04736328125 0.047393798828125 0.04742431640625 0.047454833984375 0.0474853515625 0.0474853515625 0.047515869140625 0.04754638671875 0.047576904296875 0.047607421875 0.047637939453125 0.047637939453125 0.04766845703125 0.047698974609375 0.0477294921875 0.047760009765625 0.047760009765625 0.04779052734375 0.047821044921875 0.0478515625 0.047882080078125 0.04791259765625 0.04791259765625 0.047943115234375 0.0479736328125 0.048004150390625 0.04803466796875 0.048065185546875 0.048065185546875 0.048095703125 0.048126220703125 0.04815673828125 0.048187255859375 0.048187255859375 0.0482177734375 0.048248291015625 0.04827880859375 0.048309326171875 0.04833984375 0.04833984375 0.048370361328125 0.04840087890625 0.048431396484375 0.0484619140625 0.0484619140625 0.048492431640625 0.04852294921875 0.048553466796875 0.048583984375 0.048614501953125 0.048614501953125 0.04864501953125 0.048675537109375 0.0487060546875 0.048736572265625 0.048736572265625 0.04876708984375 0.048797607421875 0.048828125 0.048858642578125 0.04888916015625 0.04888916015625 0.048919677734375 0.0489501953125 0.048980712890625 0.04901123046875 0.04901123046875 0.049041748046875 0.049072265625 0.049102783203125 0.04913330078125 0.049163818359375 0.049163818359375 0.0491943359375 0.049224853515625 0.04925537109375 0.049285888671875 0.049285888671875 0.04931640625 0.049346923828125 0.04937744140625 0.049407958984375 0.0494384765625 0.0494384765625 0.049468994140625 0.04949951171875 0.049530029296875 0.049560546875 0.049560546875 0.049591064453125 0.04962158203125 0.049652099609375 0.0496826171875 0.049713134765625 0.049713134765625 0.04974365234375 0.049774169921875 0.0498046875 0.049835205078125 0.049835205078125 0.04986572265625 0.049896240234375 0.0499267578125 0.049957275390625 0.04998779296875 0.04998779296875 0.050018310546875 0.050048828125 0.050079345703125 0.05010986328125 0.05010986328125 0.050140380859375 0.0501708984375 0.050201416015625 0.05023193359375 0.050262451171875 0.050262451171875 0.05029296875 0.050323486328125 0.05035400390625 0.050384521484375 0.0504150390625 0.0504150390625 0.050445556640625 0.05047607421875 0.050506591796875 0.050537109375 0.050537109375 0.050567626953125 0.05059814453125 0.050628662109375 0.0506591796875 0.050689697265625 0.050689697265625 0.05072021484375 0.050750732421875 0.05078125 0.050811767578125 0.050811767578125 0.05084228515625 0.050872802734375 0.0509033203125 0.050933837890625 0.05096435546875 0.05096435546875 0.050994873046875 0.051025390625 0.051055908203125 0.05108642578125 0.05108642578125 0.051116943359375 0.0511474609375 0.051177978515625 0.05120849609375 0.05120849609375 0.051239013671875 0.051300048828125 0.051300048828125 0.051300048828125 0.051361083984375 0.0513916015625 0.0513916015625 0.0513916015625 0.05145263671875 0.051513671875 0.051513671875 0.051513671875 0.051544189453125 0.051605224609375 0.051605224609375 0.051605224609375 0.0516357421875 0.05169677734375 0.05169677734375 0.05169677734375 0.0517578125 0.051788330078125 0.051788330078125 0.051788330078125 0.051849365234375 0.051910400390625 0.051910400390625 0.051910400390625 0.05194091796875 0.052001953125 0.052001953125 0.052001953125 0.05206298828125 0.052093505859375 0.052093505859375 0.052093505859375 0.052154541015625 0.05218505859375 0.05218505859375 0.05218505859375 0.05224609375 0.05230712890625 0.05230712890625 0.05230712890625 0.052337646484375 0.052398681640625 0.052398681640625 0.052398681640625 0.052459716796875 0.052490234375 0.052490234375 0.052490234375 0.05255126953125 0.0526123046875 0.0526123046875 0.0526123046875 0.052642822265625 0.052703857421875 0.052703857421875 0.052703857421875 0.052764892578125 0.05279541015625 0.05279541015625 0.05279541015625 0.0528564453125 0.052886962890625 0.052886962890625 0.052886962890625 0.052947998046875 0.053009033203125 0.053009033203125 0.053009033203125 0.05303955078125 0.0531005859375 0.0531005859375 0.0531005859375 0.05316162109375 0.053192138671875 0.053192138671875 0.053192138671875 0.053253173828125 0.053314208984375 0.053314208984375 0.053314208984375 0.0533447265625 0.05340576171875 0.05340576171875 0.05340576171875 0.053436279296875 0.053497314453125 0.053497314453125 0.053497314453125 0.053558349609375 0.0535888671875 0.0535888671875 0.0535888671875 0.05364990234375 0.0537109375 0.0537109375 0.0537109375 0.053741455078125 0.053802490234375 0.053802490234375 0.053802490234375 0.053863525390625 0.05389404296875 0.05389404296875 0.05389404296875 0.053955078125 0.053985595703125 0.053985595703125 0.053985595703125 0.054046630859375 0.054107666015625 0.054107666015625 0.054107666015625 0.05413818359375 0.05419921875 0.05419921875 0.05419921875 0.05426025390625 0.054290771484375 0.054290771484375 0.054290771484375 0.054351806640625 0.054412841796875 0.054412841796875 0.054412841796875 0.054443359375 0.05450439453125 0.05450439453125 0.05450439453125 0.054534912109375 0.054595947265625 0.054595947265625 0.054595947265625 0.054656982421875 0.0546875 0.0546875 0.0546875 0.05474853515625 0.0548095703125 0.0548095703125 0.0548095703125 0.054840087890625 0.054901123046875 0.054901123046875 0.054901123046875 0.054962158203125 0.05499267578125 0.05499267578125 0.05499267578125 0.0550537109375 0.05511474609375 0.05511474609375 0.05511474609375 0.055145263671875 0.055206298828125 0.055206298828125 0.055206298828125 0.05523681640625 0.0552978515625 0.0552978515625 0.0552978515625 0.05535888671875 0.055389404296875 0.055389404296875 0.055389404296875 0.055450439453125 0.055511474609375 0.055511474609375 0.055511474609375 0.0555419921875 0.05560302734375 0.05560302734375 0.05560302734375 0.0556640625 0.055694580078125 0.055694580078125 0.055694580078125 0.055755615234375 0.0557861328125 0.0557861328125 0.0557861328125 0.05584716796875 0.055908203125 0.055908203125 0.055908203125 0.055938720703125 0.055999755859375 0.055999755859375 0.055999755859375 0.056060791015625 0.05609130859375 0.05609130859375 0.05609130859375 0.05615234375 0.05621337890625 0.05621337890625 0.05621337890625 0.056243896484375 0.056304931640625 0.056304931640625 0.056304931640625 0.05633544921875 0.056396484375 0.056396484375 0.056396484375 0.05645751953125 0.056488037109375 0.056488037109375 0.056488037109375 0.056549072265625 0.056610107421875 0.056610107421875 0.056610107421875 0.056640625 0.05670166015625 0.05670166015625 0.05670166015625 0.0567626953125 0.056793212890625 0.056793212890625 0.056793212890625 0.056854248046875 0.056884765625 0.056884765625 0.056884765625 0.05694580078125 0.0570068359375 0.0570068359375 0.0570068359375 0.057037353515625 0.057098388671875 0.057098388671875 0.057098388671875 0.057159423828125 0.05718994140625 0.05718994140625 0.05718994140625 0.0572509765625 0.05731201171875 0.05731201171875 0.05731201171875 0.057342529296875 0.057403564453125 0.057403564453125 0.057403564453125 0.057464599609375 0.0574951171875 0.0574951171875 0.0574951171875 0.05755615234375 0.057586669921875 0.057586669921875 0.057586669921875 0.057647705078125 0.057708740234375 0.057708740234375 0.057708740234375 0.0577392578125 0.05780029296875 0.05780029296875 0.05780029296875 0.057861328125 0.057891845703125 0.057891845703125 0.057891845703125 0.057952880859375 0.058013916015625 0.058013916015625 0.058013916015625 0.05804443359375 0.05810546875 0.05810546875 0.05810546875 0.058135986328125 0.058197021484375 0.058197021484375 0.058197021484375 0.058258056640625 0.05828857421875 0.05828857421875 0.05828857421875 0.058349609375 0.05841064453125 0.05841064453125 0.05841064453125 0.058441162109375 0.058502197265625 0.058502197265625 0.058502197265625 0.058563232421875 0.05859375 0.05859375 0.05859375 0.05865478515625 0.058685302734375 0.058685302734375 0.058685302734375 0.058746337890625 0.058807373046875 0.058807373046875 0.058807373046875 0.058837890625 0.05889892578125 0.05889892578125 0.05889892578125 0.0589599609375 0.058990478515625 0.058990478515625 0.058990478515625 0.059051513671875 0.059112548828125 0.059112548828125 0.059112548828125 0.05914306640625 0.0592041015625 0.0592041015625 0.0592041015625 0.05926513671875 0.059295654296875 0.059295654296875 0.059295654296875 0.059356689453125 0.05938720703125 0.05938720703125 0.05938720703125 0.0594482421875 0.05950927734375 0.05950927734375 0.05950927734375 0.059539794921875 0.059600830078125 0.059600830078125 0.059600830078125 0.059661865234375 0.0596923828125 0.0596923828125 0.0596923828125 0.05975341796875 0.059814453125 0.059814453125 0.059814453125 0.059844970703125 0.059906005859375 0.059906005859375 0.059906005859375 0.0599365234375 0.05999755859375 0.05999755859375 0.05999755859375 0.06005859375 0.060089111328125 0.060089111328125 0.060089111328125 0.060150146484375 0.060211181640625 0.060211181640625 0.060211181640625 0.06024169921875 0.060302734375 0.060302734375 0.060302734375 0.06036376953125 0.060394287109375 0.060394287109375 0.060394287109375 0.060455322265625 0.06048583984375 0.06048583984375 0.06048583984375 0.060546875 0.06060791015625 0.06060791015625 0.06060791015625 0.060638427734375 0.060699462890625 0.060699462890625 0.060699462890625 0.060760498046875 0.060791015625 0.060791015625 0.060791015625 0.06085205078125 0.0609130859375 0.0609130859375 0.0609130859375 0.060943603515625 0.061004638671875 0.061004638671875 0.061004638671875 0.06103515625 0.06109619140625 0.06109619140625 0.06109619140625 0.0611572265625 0.061187744140625 0.061187744140625 0.061187744140625 0.061248779296875 0.061309814453125 0.061309814453125 0.061309814453125 0.06134033203125 0.0614013671875 0.0614013671875 0.0614013671875 0.06146240234375 0.061492919921875 0.061492919921875 0.061492919921875 0.061553955078125 0.061614990234375 0.061614990234375 0.061614990234375 0.0616455078125 0.06170654296875 0.06170654296875 0.06170654296875 0.061737060546875 0.061798095703125 0.061798095703125 0.061798095703125 0.061859130859375 0.0618896484375 0.0618896484375 0.0618896484375 0.06195068359375 0.06201171875 0.06201171875 0.06201171875 0.062042236328125 0.062103271484375 0.062103271484375 0.062103271484375 0.062164306640625 0.06219482421875 0.06219482421875 0.06219482421875 0.062255859375 0.062286376953125 0.062286376953125 0.062286376953125 0.062347412109375 0.062408447265625 0.062408447265625 0.062408447265625 0.06243896484375 0.0625 0.0625 0.0625 0.06256103515625 0.0626220703125 0.0626220703125 0.0626220703125 0.0626220703125 0.06268310546875 0.06268310546875 0.06268310546875 0.062744140625 0.06280517578125 0.06280517578125 0.06280517578125 0.0628662109375 0.06292724609375 0.06292724609375 0.06292724609375 0.06292724609375 0.06298828125 0.06298828125 0.06298828125 0.06304931640625 0.0631103515625 0.0631103515625 0.0631103515625 0.06317138671875 0.06317138671875 0.06317138671875 0.06317138671875 0.063232421875 0.06329345703125 0.06329345703125 0.06329345703125 0.0633544921875 0.06341552734375 0.06341552734375 0.06341552734375 0.0634765625 0.0634765625 0.0634765625 0.0634765625 0.06353759765625 0.0635986328125 0.0635986328125 0.0635986328125 0.06365966796875 0.063720703125 0.063720703125 0.063720703125 0.063720703125 0.06378173828125 0.06378173828125 0.06378173828125 0.0638427734375 0.06390380859375 0.06390380859375 0.06390380859375 0.06396484375 0.06402587890625 0.06402587890625 0.06402587890625 0.06402587890625 0.0640869140625 0.0640869140625 0.0640869140625 0.06414794921875 0.064208984375 0.064208984375 0.064208984375 0.06427001953125 0.06427001953125 0.06427001953125 0.06427001953125 0.0643310546875 0.06439208984375 0.06439208984375 0.06439208984375 0.064453125 0.06451416015625 0.06451416015625 0.06451416015625 0.0645751953125 0.0645751953125 0.0645751953125 0.0645751953125 0.06463623046875 0.064697265625 0.064697265625 0.064697265625 0.06475830078125 0.0648193359375 0.0648193359375 0.0648193359375 0.06488037109375 0.06488037109375 0.06488037109375 0.06488037109375 0.06494140625 0.06500244140625 0.06500244140625 0.06500244140625 0.0650634765625 0.06512451171875 0.06512451171875 0.06512451171875 0.06512451171875 0.065185546875 0.065185546875 0.065185546875 0.06524658203125 0.0653076171875 0.0653076171875 0.0653076171875 0.06536865234375 0.0654296875 0.0654296875 0.0654296875 0.0654296875 0.06549072265625 0.06549072265625 0.06549072265625 0.0655517578125 0.06561279296875 0.06561279296875 0.06561279296875 0.065673828125 0.065673828125 0.065673828125 0.065673828125 0.06573486328125 0.0657958984375 0.0657958984375 0.0657958984375 0.06585693359375 0.06591796875 0.06591796875 0.06591796875 0.06597900390625 0.06597900390625 0.06597900390625 0.06597900390625 0.0660400390625 0.06610107421875 0.06610107421875 0.06610107421875 0.066162109375 0.06622314453125 0.06622314453125 0.06622314453125 0.06622314453125 0.0662841796875 0.0662841796875 0.0662841796875 0.06634521484375 0.06640625 0.06640625 0.06640625 0.06646728515625 0.0665283203125 0.0665283203125 0.0665283203125 0.0665283203125 0.06658935546875 0.06658935546875 0.06658935546875 0.066650390625 0.06671142578125 0.06671142578125 0.06671142578125 0.0667724609375 0.0667724609375 0.0667724609375 0.0667724609375 0.06683349609375 0.06689453125 0.06689453125 0.06689453125 0.06695556640625 0.0670166015625 0.0670166015625 0.0670166015625 0.06707763671875 0.06707763671875 0.06707763671875 0.06707763671875 0.067138671875 0.06719970703125 0.06719970703125 0.06719970703125 0.0672607421875 0.06732177734375 0.06732177734375 0.06732177734375 0.06732177734375 0.0673828125 0.0673828125 0.0673828125 0.06744384765625 0.0675048828125 0.0675048828125 0.0675048828125 0.06756591796875 0.067626953125 0.067626953125 0.067626953125 0.067626953125 0.06768798828125 0.06768798828125 0.06768798828125 0.0677490234375 0.06781005859375 0.06781005859375 0.06781005859375 0.06787109375 0.06787109375 0.06787109375 0.06787109375 0.06793212890625 0.0679931640625 0.0679931640625 0.0679931640625 0.06805419921875 0.068115234375 0.068115234375 0.068115234375 0.06817626953125 0.06817626953125 0.06817626953125 0.06817626953125 0.0682373046875 0.06829833984375 0.06829833984375 0.06829833984375 0.068359375 0.06842041015625 0.06842041015625 0.06842041015625 0.06842041015625 0.0684814453125 0.0684814453125 0.0684814453125 0.06854248046875 0.068603515625 0.068603515625 0.068603515625 0.06866455078125 0.0687255859375 0.0687255859375 0.0687255859375 0.0687255859375 0.06878662109375 0.06878662109375 0.06878662109375 0.06884765625 0.06890869140625 0.06890869140625 0.06890869140625 0.0689697265625 0.0689697265625 0.0689697265625 0.0689697265625 0.06903076171875 0.069091796875 0.069091796875 0.069091796875 0.06915283203125 0.0692138671875 0.0692138671875 0.0692138671875 0.06927490234375 0.06927490234375 0.06927490234375 0.06927490234375 0.0693359375 0.06939697265625 0.06939697265625 0.06939697265625 0.0694580078125 0.06951904296875 0.06951904296875 0.06951904296875 0.069580078125 0.069580078125 0.069580078125 0.069580078125 0.06964111328125 0.0697021484375 0.0697021484375 0.0697021484375 0.06976318359375 0.06982421875 0.06982421875 0.06982421875 0.06982421875 0.06988525390625 0.06988525390625 0.06988525390625 0.0699462890625 0.07000732421875 0.07000732421875 0.07000732421875 0.070068359375 0.07012939453125 0.07012939453125 0.07012939453125 0.07012939453125 0.0701904296875 0.0701904296875 0.0701904296875 0.07025146484375 0.0703125 0.0703125 0.0703125 0.07037353515625 0.07037353515625 0.07037353515625 0.07037353515625 0.0704345703125 0.07049560546875 0.07049560546875 0.07049560546875 0.070556640625 0.07061767578125 0.07061767578125 0.07061767578125 0.0706787109375 0.0706787109375 0.0706787109375 0.0706787109375 0.07073974609375 0.07080078125 0.07080078125 0.07080078125 0.07086181640625 0.0709228515625 0.0709228515625 0.0709228515625 0.0709228515625 0.07098388671875 0.07098388671875 0.07098388671875 0.071044921875 0.07110595703125 0.07110595703125 0.07110595703125 0.0711669921875 0.07122802734375 0.07122802734375 0.07122802734375 0.07122802734375 0.0712890625 0.0712890625 0.0712890625 0.07135009765625 0.0714111328125 0.0714111328125 0.0714111328125 0.07147216796875 0.07147216796875 0.07147216796875 0.07147216796875 0.071533203125 0.07159423828125 0.07159423828125 0.07159423828125 0.0716552734375 0.07171630859375 0.07171630859375 0.07171630859375 0.07177734375 0.07177734375 0.07177734375 0.07177734375 0.07183837890625 0.0718994140625 0.0718994140625 0.0718994140625 0.07196044921875 0.072021484375 0.072021484375 0.072021484375 0.072021484375 0.07208251953125 0.07208251953125 0.07208251953125 0.0721435546875 0.07220458984375 0.07220458984375 0.07220458984375 0.072265625 0.07232666015625 0.07232666015625 0.07232666015625 0.07232666015625 0.0723876953125 0.0723876953125 0.0723876953125 0.07244873046875 0.072509765625 0.072509765625 0.072509765625 0.07257080078125 0.07257080078125 0.07257080078125 0.07257080078125 0.0726318359375 0.07269287109375 0.07269287109375 0.07269287109375 0.07275390625 0.07281494140625 0.07281494140625 0.07281494140625 0.0728759765625 0.0728759765625 0.0728759765625 0.0728759765625 0.07293701171875 0.072998046875 0.072998046875 0.072998046875 0.07305908203125 0.0731201171875 0.0731201171875 0.0731201171875 0.0731201171875 0.07318115234375 0.07318115234375 0.07318115234375 0.0732421875 0.07330322265625 0.07330322265625 0.07330322265625 0.0733642578125 0.07342529296875 0.07342529296875 0.07342529296875 0.07342529296875 0.073486328125 0.073486328125 0.073486328125 0.07354736328125 0.0736083984375 0.0736083984375 0.0736083984375 0.07366943359375 0.07373046875 0.07373046875 0.07373046875 0.07373046875 0.07379150390625 0.07379150390625 0.07379150390625 0.0738525390625 0.07391357421875 0.07391357421875 0.07391357421875 0.073974609375 0.073974609375 0.073974609375 0.073974609375 0.07403564453125 0.0740966796875 0.0740966796875 0.0740966796875 0.07415771484375 0.07421875 0.07421875 0.07421875 0.07427978515625 0.07427978515625 0.07427978515625 0.07427978515625 0.0743408203125 0.07440185546875 0.07440185546875 0.07440185546875 0.074462890625 0.07452392578125 0.07452392578125 0.07452392578125 0.07452392578125 0.0745849609375 0.0745849609375 0.0745849609375 0.07464599609375 0.07470703125 0.07470703125 0.07470703125 0.07476806640625 0.0748291015625 0.0748291015625 0.0748291015625 0.0748291015625 0.07489013671875 0.07489013671875 0.07489013671875 0.074951171875 0.07501220703125 0.07501220703125 0.07501220703125 0.0750732421875 0.0750732421875 0.0750732421875 0.0750732421875 0.07513427734375 0.0751953125 0.0751953125 0.0751953125 0.07525634765625 0.0753173828125 0.0753173828125 0.0753173828125 0.07537841796875 0.07537841796875 0.07537841796875 0.07537841796875 0.075439453125 0.07550048828125 0.07550048828125 0.07550048828125 0.0755615234375 0.07562255859375 0.07562255859375 0.07562255859375 0.07562255859375 0.07568359375 0.07568359375 0.07568359375 0.07574462890625 0.0758056640625 0.0758056640625 0.0758056640625 0.07586669921875 0.075927734375 0.075927734375 0.075927734375 0.075927734375 0.07598876953125 0.07598876953125 0.07598876953125 0.0760498046875 0.07611083984375 0.07611083984375 0.07611083984375 0.076171875 0.076171875 0.076171875 0.076171875 0.07623291015625 0.0762939453125 0.0762939453125 0.0762939453125 0.07635498046875 0.076416015625 0.076416015625 0.076416015625 0.07647705078125 0.07647705078125 0.07647705078125 0.07647705078125 0.0765380859375 0.07659912109375 0.07659912109375 0.07659912109375 0.07666015625 0.07672119140625 0.07672119140625 0.07672119140625 0.07672119140625 0.0767822265625 0.0767822265625 0.0767822265625 0.07684326171875 0.076904296875 0.076904296875 0.076904296875 0.07696533203125 0.0770263671875 0.0770263671875 0.0770263671875 0.0770263671875 0.07708740234375 0.07708740234375 0.07708740234375 0.0771484375 0.07720947265625 0.07720947265625 0.07720947265625 0.0772705078125 0.0772705078125 0.0772705078125 0.0772705078125 0.07733154296875 0.077392578125 0.077392578125 0.077392578125 0.07745361328125 0.0775146484375 0.0775146484375 0.0775146484375 0.07757568359375 0.07757568359375 0.07757568359375 0.07757568359375 0.07763671875 0.07769775390625 0.07769775390625 0.07769775390625 0.0777587890625 0.07781982421875 0.07781982421875 0.07781982421875 0.07781982421875 0.077880859375 0.077880859375 0.077880859375 0.07794189453125 0.0780029296875 0.0780029296875 0.0780029296875 0.07806396484375 0.078125 0.078125 0.078125 0.078125 0.07818603515625 0.07818603515625 0.07818603515625 0.0782470703125 0.07830810546875 0.07830810546875 0.07830810546875 0.078369140625 0.07843017578125 0.07843017578125 0.07843017578125 0.07843017578125 0.0784912109375 0.0784912109375 0.0784912109375 0.07855224609375 0.07861328125 0.07861328125 0.07861328125 0.07867431640625 0.07867431640625 0.07867431640625 0.07867431640625 0.0787353515625 0.07879638671875 0.07879638671875 0.07879638671875 0.078857421875 0.07891845703125 0.07891845703125 0.07891845703125 0.0789794921875 0.0789794921875 0.0789794921875 0.0789794921875 0.07904052734375 0.0791015625 0.0791015625 0.0791015625 0.07916259765625 0.0792236328125 0.0792236328125 0.0792236328125 0.0792236328125 0.07928466796875 0.07928466796875 0.07928466796875 0.079345703125 0.07940673828125 0.07940673828125 0.07940673828125 0.0794677734375 0.07952880859375 0.07952880859375 0.07952880859375 0.07952880859375 0.07958984375 0.07958984375 0.07958984375 0.07965087890625 0.0797119140625 0.0797119140625 0.0797119140625 0.07977294921875 0.07977294921875 0.07977294921875 0.07977294921875 0.079833984375 0.07989501953125 0.07989501953125 0.07989501953125 0.0799560546875 0.08001708984375 0.08001708984375 0.08001708984375 0.080078125 0.080078125 0.080078125 0.080078125 0.08013916015625 0.0802001953125 0.0802001953125 0.0802001953125 0.08026123046875 0.080322265625 0.080322265625 0.080322265625 0.080322265625 0.08038330078125 0.08038330078125 0.08038330078125 0.0804443359375 0.08050537109375 0.08050537109375 0.08050537109375 0.08056640625 0.08062744140625 0.08062744140625 0.08062744140625 0.08062744140625 0.0806884765625 0.0806884765625 0.0806884765625 0.08074951171875 0.080810546875 0.080810546875 0.080810546875 0.08087158203125 0.08087158203125 0.08087158203125 0.08087158203125 0.0809326171875 0.08099365234375 0.08099365234375 0.08099365234375 0.0810546875 0.08111572265625 0.08111572265625 0.08111572265625 0.0811767578125 0.0811767578125 0.0811767578125 0.0811767578125 0.08123779296875 0.081298828125 0.081298828125 0.081298828125 0.08135986328125 0.0814208984375 0.0814208984375 0.0814208984375 0.0814208984375 0.08148193359375 0.08148193359375 0.08148193359375 0.08154296875 0.08160400390625 0.08160400390625 0.08160400390625 0.0816650390625 0.08172607421875 0.08172607421875 0.08172607421875 0.08172607421875 0.081787109375 0.081787109375 0.081787109375 0.08184814453125 0.0819091796875 0.0819091796875 0.0819091796875 0.08197021484375 0.08197021484375 0.08197021484375 0.08197021484375 0.08203125 0.08209228515625 0.08209228515625 0.08209228515625 0.0821533203125 0.08221435546875 0.08221435546875 0.08221435546875 0.082275390625 0.082275390625 0.082275390625 0.082275390625 0.08233642578125 0.0823974609375 0.0823974609375 0.0823974609375 0.08245849609375 0.08251953125 0.08251953125 0.08251953125 0.08251953125 0.08258056640625 0.08258056640625 0.08258056640625 0.0826416015625 0.08270263671875 0.08270263671875 0.08270263671875 0.082763671875 0.08282470703125 0.08282470703125 0.08282470703125 0.08282470703125 0.0828857421875 0.0828857421875 0.0828857421875 0.08294677734375 0.0830078125 0.0830078125 0.0830078125 0.08306884765625 0.0831298828125 0.0831298828125 0.0831298828125 0.0831298828125 0.08319091796875 0.08319091796875 0.08319091796875 0.083251953125 0.08331298828125 0.08331298828125 0.08331298828125 0.0833740234375 0.0833740234375 0.0833740234375 0.0833740234375 0.08343505859375 0.08349609375 0.08349609375 0.08349609375 0.08355712890625 0.0836181640625 0.0836181640625 0.0836181640625 0.08367919921875 0.08367919921875 0.08367919921875 0.08367919921875 0.083740234375 0.08380126953125 0.08380126953125 0.08380126953125 0.0838623046875 0.08392333984375 0.08392333984375 0.08392333984375 0.08392333984375 0.083984375 0.083984375 0.083984375 0.08404541015625 0.0841064453125 0.0841064453125 0.0841064453125 0.08416748046875 0.084228515625 0.084228515625 0.084228515625 0.084228515625 0.08428955078125 0.08428955078125 0.08428955078125 0.0843505859375 0.08441162109375 0.08441162109375 0.08441162109375 0.08447265625 0.08447265625 0.08447265625 0.08447265625 0.08453369140625 0.0845947265625 0.0845947265625 0.0845947265625 0.08465576171875 0.084716796875 0.084716796875 0.084716796875 0.08477783203125 0.08477783203125 0.08477783203125 0.08477783203125 0.0848388671875 0.08489990234375 0.08489990234375 0.08489990234375 0.0849609375 0.08502197265625 0.08502197265625 0.08502197265625 0.08502197265625 0.0850830078125 0.0850830078125 0.0850830078125 0.08514404296875 0.085205078125 0.085205078125 0.085205078125 0.08526611328125 0.0853271484375 0.0853271484375 0.0853271484375 0.0853271484375 0.08538818359375 0.08538818359375 0.08538818359375 0.08544921875 0.08551025390625 0.08551025390625 0.08551025390625 0.0855712890625 0.0855712890625 0.0855712890625 0.0855712890625 0.08563232421875 0.085693359375 0.085693359375 0.085693359375 0.08575439453125 0.0858154296875 0.0858154296875 0.0858154296875 0.08587646484375 0.08587646484375 0.08587646484375 0.08587646484375 0.0859375 0.08599853515625 0.08599853515625 0.08599853515625 0.0860595703125 0.08612060546875 0.08612060546875 0.08612060546875 0.08612060546875 0.086181640625 0.086181640625 0.086181640625 0.08624267578125 0.0863037109375 0.0863037109375 0.0863037109375 0.08636474609375 0.08642578125 0.08642578125 0.08642578125 0.08642578125 0.08648681640625 0.08648681640625 0.08648681640625 0.0865478515625 0.08660888671875 0.08660888671875 0.08660888671875 0.086669921875 0.086669921875 0.086669921875 0.086669921875 0.08673095703125 0.0867919921875 0.0867919921875 0.0867919921875 0.08685302734375 0.0869140625 0.0869140625 0.0869140625 0.08697509765625 0.08697509765625 0.08697509765625 0.08697509765625 0.0870361328125 0.08709716796875 0.08709716796875 0.08709716796875 0.087158203125 0.08721923828125 0.08721923828125 0.08721923828125 0.0872802734375 0.0872802734375 0.0872802734375 0.0872802734375 0.08734130859375 0.08740234375 0.08740234375 0.08740234375 0.08746337890625 0.0875244140625 0.0875244140625 0.0875244140625 0.0875244140625 0.08758544921875 0.08758544921875 0.08758544921875 0.087646484375 0.08770751953125 0.08770751953125 0.08770751953125 0.0877685546875 0.08782958984375 0.08782958984375 0.08782958984375 0.08782958984375 0.087890625 0.087890625 0.087890625 0.08795166015625 0.0880126953125 0.0880126953125 0.0880126953125 0.08807373046875 0.08807373046875 0.08807373046875 0.08807373046875 0.088134765625 0.08819580078125 0.08819580078125 0.08819580078125 0.0882568359375 0.08831787109375 0.08831787109375 0.08831787109375 0.08837890625 0.08837890625 0.08837890625 0.08837890625 0.08843994140625 0.0885009765625 0.0885009765625 0.0885009765625 0.08856201171875 0.088623046875 0.088623046875 0.088623046875 0.088623046875 0.08868408203125 0.08868408203125 0.08868408203125 0.0887451171875 0.08880615234375 0.08880615234375 0.08880615234375 0.0888671875 0.08892822265625 0.08892822265625 0.08892822265625 0.08892822265625 0.0889892578125 0.0889892578125 0.0889892578125 0.08905029296875 0.089111328125 0.089111328125 0.089111328125 0.08917236328125 0.08917236328125 0.08917236328125 0.08917236328125 0.0892333984375 0.08929443359375 0.08929443359375 0.08929443359375 0.08935546875 0.08941650390625 0.08941650390625 0.08941650390625 0.0894775390625 0.0894775390625 0.0894775390625 0.0894775390625 0.08953857421875 0.089599609375 0.089599609375 0.089599609375 0.08966064453125 0.0897216796875 0.0897216796875 0.0897216796875 0.0897216796875 0.08978271484375 0.08978271484375 0.08978271484375 0.08984375 0.08990478515625 0.08990478515625 0.08990478515625 0.0899658203125 0.09002685546875 0.09002685546875 0.09002685546875 0.09002685546875 0.090087890625 0.090087890625 0.090087890625 0.09014892578125 0.0902099609375 0.0902099609375 0.0902099609375 0.09027099609375 0.09027099609375 0.09027099609375 0.09027099609375 0.09033203125 0.09039306640625 0.09039306640625 0.09039306640625 0.0904541015625 0.09051513671875 0.09051513671875 0.09051513671875 0.090576171875 0.090576171875 0.090576171875 0.090576171875 0.09063720703125 0.0906982421875 0.0906982421875 0.0906982421875 0.09075927734375 0.0908203125 0.0908203125 0.0908203125 0.0908203125 0.09088134765625 0.09088134765625 0.09088134765625 0.0909423828125 0.09100341796875 0.09100341796875 0.09100341796875 0.091064453125 0.09112548828125 0.09112548828125 0.09112548828125 0.09112548828125 0.0911865234375 0.0911865234375 0.0911865234375 0.09124755859375 0.09130859375 0.09130859375 0.09130859375 0.09136962890625 0.09136962890625 0.09136962890625 0.09136962890625 0.0914306640625 0.09149169921875 0.09149169921875 0.09149169921875 0.091552734375 0.09161376953125 0.09161376953125 0.09161376953125 0.0916748046875 0.0916748046875 0.0916748046875 0.0916748046875 0.09173583984375 0.091796875 0.091796875 0.091796875 0.09185791015625 0.0919189453125 0.0919189453125 0.0919189453125 0.09197998046875 0.09197998046875 0.09197998046875 0.09197998046875 0.092041015625 0.09210205078125 0.09210205078125 0.09210205078125 0.0921630859375 0.09222412109375 0.09222412109375 0.09222412109375 0.09222412109375 0.09228515625 0.09228515625 0.09228515625 0.09234619140625 0.0924072265625 0.0924072265625 0.0924072265625 0.09246826171875 0.092529296875 0.092529296875 0.092529296875 0.092529296875 0.09259033203125 0.09259033203125 0.09259033203125 0.0926513671875 0.09271240234375 0.09271240234375 0.09271240234375 0.0927734375 0.0927734375 0.0927734375 0.0927734375 0.09283447265625 0.0928955078125 0.0928955078125 0.0928955078125 0.09295654296875 0.093017578125 0.093017578125 0.093017578125 0.09307861328125 0.09307861328125 0.09307861328125 0.09307861328125 0.0931396484375 0.09320068359375 0.09320068359375 0.09320068359375 0.09326171875 0.09332275390625 0.09332275390625 0.09332275390625 0.09332275390625 0.0933837890625 0.0933837890625 0.0933837890625 0.09344482421875 0.093505859375 0.093505859375 0.093505859375 0.09356689453125 0.0936279296875 0.0936279296875 0.0936279296875 0.0936279296875 0.09368896484375 0.09368896484375 0.09368896484375 0.09375 0.09381103515625 0.09381103515625 0.09381103515625 0.0938720703125 0.0938720703125 0.0938720703125 0.0938720703125 0.09393310546875 0.093994140625 0.093994140625 0.093994140625 0.09405517578125 0.0941162109375 0.0941162109375 0.0941162109375 0.09417724609375 0.09417724609375 0.09417724609375 0.09417724609375 0.09423828125 0.09429931640625 0.09429931640625 0.09429931640625 0.0943603515625 0.09442138671875 0.09442138671875 0.09442138671875 0.09442138671875 0.094482421875 0.094482421875 0.094482421875 0.09454345703125 0.0946044921875 0.0946044921875 0.0946044921875 0.09466552734375 0.0947265625 0.0947265625 0.0947265625 0.0947265625 0.09478759765625 0.09478759765625 0.09478759765625 0.0948486328125 0.09490966796875 0.09490966796875 0.09490966796875 0.094970703125 0.094970703125 0.094970703125 0.094970703125 0.09503173828125 0.0950927734375 0.0950927734375 0.0950927734375 0.09515380859375 0.09521484375 0.09521484375 0.09521484375 0.09527587890625 0.09527587890625 0.09527587890625 0.09527587890625 0.0953369140625 0.09539794921875 0.09539794921875 0.09539794921875 0.095458984375 0.09552001953125 0.09552001953125 0.09552001953125 0.09552001953125 0.0955810546875 0.0955810546875 0.0955810546875 0.09564208984375 0.095703125 0.095703125 0.095703125 0.09576416015625 0.0958251953125 0.0958251953125 0.0958251953125 0.0958251953125 0.09588623046875 0.09588623046875 0.09588623046875 0.095947265625 0.09600830078125 0.09600830078125 0.09600830078125 0.0960693359375 0.09613037109375 0.09613037109375 0.09613037109375 0.09613037109375 0.09619140625 0.09619140625 0.09619140625 0.09625244140625 0.0963134765625 0.0963134765625 0.0963134765625 0.09637451171875 0.09637451171875 0.09637451171875 0.09637451171875 0.096435546875 0.09649658203125 0.09649658203125 0.09649658203125 0.0965576171875 0.09661865234375 0.09661865234375 0.09661865234375 0.0966796875 0.0966796875 0.0966796875 0.0966796875 0.09674072265625 0.0968017578125 0.0968017578125 0.0968017578125 0.09686279296875 0.096923828125 0.096923828125 0.096923828125 0.096923828125 0.09698486328125 0.09698486328125 0.09698486328125 0.0970458984375 0.09710693359375 0.09710693359375 0.09710693359375 0.09716796875 0.09722900390625 0.09722900390625 0.09722900390625 0.09722900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.09735107421875 0.097412109375 0.097412109375 0.097412109375 0.09747314453125 0.09747314453125 0.09747314453125 0.09747314453125 0.0975341796875 0.09759521484375 0.09759521484375 0.09759521484375 0.09765625 0.09771728515625 0.09771728515625 0.09771728515625 0.0977783203125 0.0977783203125 0.0977783203125 0.0977783203125 0.09783935546875 0.097900390625 0.097900390625 0.097900390625 0.09796142578125 0.0980224609375 0.0980224609375 0.0980224609375 0.0980224609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09814453125 0.09820556640625 0.09820556640625 0.09820556640625 0.0982666015625 0.09832763671875 0.09832763671875 0.09832763671875 0.09832763671875 0.098388671875 0.098388671875 0.098388671875 0.09844970703125 0.0985107421875 0.0985107421875 0.0985107421875 0.09857177734375 0.09857177734375 0.09857177734375 0.09857177734375 0.0986328125 0.09869384765625 0.09869384765625 0.09869384765625 0.0987548828125 0.09881591796875 0.09881591796875 0.09881591796875 0.098876953125 0.098876953125 0.098876953125 0.098876953125 0.09893798828125 0.0989990234375 0.0989990234375 0.0989990234375 0.09906005859375 0.09912109375 0.09912109375 0.09912109375 0.09912109375 0.09918212890625 0.09918212890625 0.09918212890625 0.0992431640625 0.09930419921875 0.09930419921875 0.09930419921875 0.099365234375 0.09942626953125 0.09942626953125 0.09942626953125 0.09942626953125 0.0994873046875 0.0994873046875 0.0994873046875 0.09954833984375 0.099609375 0.099609375 0.099609375 0.09967041015625 0.09967041015625 0.09967041015625 0.09967041015625 0.0997314453125 0.09979248046875 0.09979248046875 0.09979248046875 0.099853515625 0.09991455078125 0.09991455078125 0.09991455078125 0.0999755859375 0.0999755859375 0.0999755859375 0.0999755859375 0.10003662109375 0.10009765625 0.10009765625 0.10009765625 0.10015869140625 0.1002197265625 0.1002197265625 0.1002197265625 0.1002197265625 0.10028076171875 0.10028076171875 0.10028076171875 0.100341796875 0.10040283203125 0.10040283203125 0.10040283203125 0.1004638671875 0.10052490234375 0.10052490234375 0.10052490234375 0.10052490234375 0.1005859375 0.1005859375 0.1005859375 0.10064697265625 0.1007080078125 0.1007080078125 0.1007080078125 0.10076904296875 0.100830078125 0.100830078125 0.100830078125 0.100830078125 0.10089111328125 0.10089111328125 0.10089111328125 0.1009521484375 0.10101318359375 0.10101318359375 0.10101318359375 0.10107421875 0.10107421875 0.10107421875 0.10107421875 0.10113525390625 0.1011962890625 0.1011962890625 0.1011962890625 0.10125732421875 0.101318359375 0.101318359375 0.101318359375 0.10137939453125 0.10137939453125 0.10137939453125 0.10137939453125 0.1014404296875 0.10150146484375 0.10150146484375 0.10150146484375 0.1015625 0.10162353515625 0.10162353515625 0.10162353515625 0.10162353515625 0.1016845703125 0.1016845703125 0.1016845703125 0.10174560546875 0.101806640625 0.101806640625 0.101806640625 0.10186767578125 0.1019287109375 0.1019287109375 0.1019287109375 0.1019287109375 0.10198974609375 0.10198974609375 0.10198974609375 0.10205078125 0.10211181640625 0.10211181640625 0.10211181640625 0.1021728515625 0.1021728515625 0.1021728515625 0.1021728515625 0.10223388671875 0.102294921875 0.102294921875 0.102294921875 0.10235595703125 0.1024169921875 0.1024169921875 0.1024169921875 0.1024169921875 0.10247802734375 0.10247802734375 0.10247802734375 0.10260009765625 0.10260009765625 0.10260009765625 0.10260009765625 0.10260009765625 0.10272216796875 0.10272216796875 0.10272216796875 0.102783203125 0.102783203125 0.102783203125 0.102783203125 0.102783203125 0.1029052734375 0.1029052734375 0.1029052734375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.10308837890625 0.10308837890625 0.10308837890625 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.103271484375 0.103271484375 0.103271484375 0.1033935546875 0.1033935546875 0.1033935546875 0.1033935546875 0.1033935546875 0.103515625 0.103515625 0.103515625 0.10357666015625 0.10357666015625 0.10357666015625 0.10357666015625 0.10357666015625 0.10369873046875 0.10369873046875 0.10369873046875 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.1038818359375 0.1038818359375 0.1038818359375 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.1041259765625 0.1041259765625 0.1041259765625 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.10430908203125 0.10430908203125 0.10430908203125 0.1043701171875 0.1043701171875 0.1043701171875 0.1043701171875 0.1043701171875 0.1044921875 0.1044921875 0.1044921875 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.10467529296875 0.10467529296875 0.10467529296875 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10491943359375 0.10491943359375 0.10491943359375 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.1051025390625 0.1051025390625 0.1051025390625 0.105224609375 0.105224609375 0.105224609375 0.105224609375 0.105224609375 0.10528564453125 0.10528564453125 0.10528564453125 0.10540771484375 0.10540771484375 0.10540771484375 0.10540771484375 0.10540771484375 0.10552978515625 0.10552978515625 0.10552978515625 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.105712890625 0.105712890625 0.105712890625 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.10589599609375 0.10589599609375 0.10589599609375 0.10601806640625 0.10601806640625 0.10601806640625 0.10601806640625 0.10601806640625 0.1060791015625 0.1060791015625 0.1060791015625 0.106201171875 0.106201171875 0.106201171875 0.106201171875 0.106201171875 0.1063232421875 0.1063232421875 0.1063232421875 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10650634765625 0.10650634765625 0.10650634765625 0.10662841796875 0.10662841796875 0.10662841796875 0.10662841796875 0.10662841796875 0.106689453125 0.106689453125 0.106689453125 0.1068115234375 0.1068115234375 0.1068115234375 0.1068115234375 0.1068115234375 0.10687255859375 0.10687255859375 0.10687255859375 0.10699462890625 0.10699462890625 0.10699462890625 0.10699462890625 0.10699462890625 0.10711669921875 0.10711669921875 0.10711669921875 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.1072998046875 0.1072998046875 0.1072998046875 0.107421875 0.107421875 0.107421875 0.107421875 0.107421875 0.10748291015625 0.10748291015625 0.10748291015625 0.10760498046875 0.10760498046875 0.10760498046875 0.10760498046875 0.10760498046875 0.10772705078125 0.10772705078125 0.10772705078125 0.1077880859375 0.1077880859375 0.1077880859375 0.1077880859375 0.1077880859375 0.10791015625 0.10791015625 0.10791015625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10809326171875 0.10809326171875 0.10809326171875 0.10821533203125 0.10821533203125 0.10821533203125 0.10821533203125 0.10821533203125 0.1082763671875 0.1082763671875 0.1082763671875 0.1083984375 0.1083984375 0.1083984375 0.1083984375 0.1083984375 0.1085205078125 0.1085205078125 0.1085205078125 0.10858154296875 0.10858154296875 0.10858154296875 0.10858154296875 0.10858154296875 0.10870361328125 0.10870361328125 0.10870361328125 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10888671875 0.10888671875 0.10888671875 0.1090087890625 0.1090087890625 0.1090087890625 0.1090087890625 0.1090087890625 0.10906982421875 0.10906982421875 0.10906982421875 0.10919189453125 0.10919189453125 0.10919189453125 0.10919189453125 0.10919189453125 0.10931396484375 0.10931396484375 0.10931396484375 0.109375 0.109375 0.109375 0.109375 0.109375 0.1094970703125 0.1094970703125 0.1094970703125 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.10968017578125 0.10968017578125 0.10968017578125 0.10980224609375 0.10980224609375 0.10980224609375 0.10980224609375 0.10980224609375 0.10992431640625 0.10992431640625 0.10992431640625 0.1099853515625 0.1099853515625 0.1099853515625 0.1099853515625 0.1099853515625 0.110107421875 0.110107421875 0.110107421875 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.11029052734375 0.11029052734375 0.11029052734375 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.1104736328125 0.1104736328125 0.1104736328125 0.110595703125 0.110595703125 0.110595703125 0.110595703125 0.110595703125 0.1107177734375 0.1107177734375 0.1107177734375 0.11077880859375 0.11077880859375 0.11077880859375 0.11077880859375 0.11077880859375 0.11090087890625 0.11090087890625 0.11090087890625 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.111083984375 0.111083984375 0.111083984375 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.111328125 0.111328125 0.111328125 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.11151123046875 0.11151123046875 0.11151123046875 0.111572265625 0.111572265625 0.111572265625 0.111572265625 0.111572265625 0.1116943359375 0.1116943359375 0.1116943359375 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.11187744140625 0.11187744140625 0.11187744140625 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.11212158203125 0.11212158203125 0.11212158203125 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.1123046875 0.1123046875 0.1123046875 0.1124267578125 0.1124267578125 0.1124267578125 0.1124267578125 0.1124267578125 0.11248779296875 0.11248779296875 0.11248779296875 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.1126708984375 0.1126708984375 0.1126708984375 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.1129150390625 0.1129150390625 0.1129150390625 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.11309814453125 0.11309814453125 0.11309814453125 0.11322021484375 0.11322021484375 0.11322021484375 0.11322021484375 0.11322021484375 0.11328125 0.11328125 0.11328125 0.1134033203125 0.1134033203125 0.1134033203125 0.1134033203125 0.1134033203125 0.113525390625 0.113525390625 0.113525390625 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11370849609375 0.11370849609375 0.11370849609375 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.1138916015625 0.1138916015625 0.1138916015625 0.114013671875 0.114013671875 0.114013671875 0.114013671875 0.114013671875 0.11407470703125 0.11407470703125 0.11407470703125 0.11419677734375 0.11419677734375 0.11419677734375 0.11419677734375 0.11419677734375 0.11431884765625 0.11431884765625 0.11431884765625 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.114501953125 0.114501953125 0.114501953125 0.1146240234375 0.1146240234375 0.1146240234375 0.1146240234375 0.1146240234375 0.11468505859375 0.11468505859375 0.11468505859375 0.11480712890625 0.11480712890625 0.11480712890625 0.11480712890625 0.11480712890625 0.11492919921875 0.11492919921875 0.11492919921875 0.114990234375 0.114990234375 0.114990234375 0.114990234375 0.114990234375 0.1151123046875 0.1151123046875 0.1151123046875 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11529541015625 0.11529541015625 0.11529541015625 0.11541748046875 0.11541748046875 0.11541748046875 0.11541748046875 0.11541748046875 0.115478515625 0.115478515625 0.115478515625 0.1156005859375 0.1156005859375 0.1156005859375 0.1156005859375 0.1156005859375 0.11572265625 0.11572265625 0.11572265625 0.11578369140625 0.11578369140625 0.11578369140625 0.11578369140625 0.11578369140625 0.11590576171875 0.11590576171875 0.11590576171875 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.1160888671875 0.1160888671875 0.1160888671875 0.1162109375 0.1162109375 0.1162109375 0.1162109375 0.1162109375 0.11627197265625 0.11627197265625 0.11627197265625 0.11639404296875 0.11639404296875 0.11639404296875 0.11639404296875 0.11639404296875 0.11651611328125 0.11651611328125 0.11651611328125 0.1165771484375 0.1165771484375 0.1165771484375 0.1165771484375 0.1165771484375 0.11669921875 0.11669921875 0.11669921875 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.11688232421875 0.11688232421875 0.11688232421875 0.11700439453125 0.11700439453125 0.11700439453125 0.11700439453125 0.11700439453125 0.11712646484375 0.11712646484375 0.11712646484375 0.1171875 0.1171875 0.1171875 0.1171875 0.1171875 0.1173095703125 0.1173095703125 0.1173095703125 0.11737060546875 0.11737060546875 0.11737060546875 0.11737060546875 0.11737060546875 0.11749267578125 0.11749267578125 0.11749267578125 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11767578125 0.11767578125 0.11767578125 0.1177978515625 0.1177978515625 0.1177978515625 0.1177978515625 0.1177978515625 0.117919921875 0.117919921875 0.117919921875 0.11798095703125 0.11798095703125 0.11798095703125 0.11798095703125 0.11798095703125 0.11810302734375 0.11810302734375 0.11810302734375 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.1182861328125 0.1182861328125 0.1182861328125 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.1185302734375 0.1185302734375 0.1185302734375 0.11859130859375 0.11859130859375 0.11859130859375 0.11859130859375 0.11859130859375 0.11871337890625 0.11871337890625 0.11871337890625 0.1187744140625 0.1187744140625 0.1187744140625 0.1187744140625 0.1187744140625 0.118896484375 0.118896484375 0.118896484375 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.11907958984375 0.11907958984375 0.11907958984375 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.11932373046875 0.11932373046875 0.11932373046875 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.1195068359375 0.1195068359375 0.1195068359375 0.11962890625 0.11962890625 0.11962890625 0.11962890625 0.11962890625 0.11968994140625 0.11968994140625 0.11968994140625 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.119873046875 0.119873046875 0.119873046875 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.1201171875 0.1201171875 0.1201171875 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.12030029296875 0.12030029296875 0.12030029296875 0.12042236328125 0.12042236328125 0.12042236328125 0.12042236328125 0.12042236328125 0.1204833984375 0.1204833984375 0.1204833984375 0.12060546875 0.12060546875 0.12060546875 0.12060546875 0.12060546875 0.1207275390625 0.1207275390625 0.1207275390625 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.12091064453125 0.12091064453125 0.12091064453125 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.12109375 0.12109375 0.12109375 0.1212158203125 0.1212158203125 0.1212158203125 0.1212158203125 0.1212158203125 0.12127685546875 0.12127685546875 0.12127685546875 0.12139892578125 0.12139892578125 0.12139892578125 0.12139892578125 0.12139892578125 0.12152099609375 0.12152099609375 0.12152099609375 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.1217041015625 0.1217041015625 0.1217041015625 0.121826171875 0.121826171875 0.121826171875 0.121826171875 0.121826171875 0.12188720703125 0.12188720703125 0.12188720703125 0.12200927734375 0.12200927734375 0.12200927734375 0.12200927734375 0.12200927734375 0.1220703125 0.1220703125 0.1220703125 0.1221923828125 0.1221923828125 0.1221923828125 0.1221923828125 0.1221923828125 0.122314453125 0.122314453125 0.122314453125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12249755859375 0.12249755859375 0.12249755859375 0.12261962890625 0.12261962890625 0.12261962890625 0.12261962890625 0.12261962890625 0.1226806640625 0.1226806640625 0.1226806640625 0.122802734375 0.122802734375 0.122802734375 0.122802734375 0.122802734375 0.1229248046875 0.1229248046875 0.1229248046875 0.12298583984375 0.12298583984375 0.12298583984375 0.12298583984375 0.12298583984375 0.12310791015625 0.12310791015625 0.12310791015625 0.12322998046875 0.12322998046875 0.12322998046875 0.12322998046875 0.12322998046875 0.123291015625 0.123291015625 0.123291015625 0.1234130859375 0.1234130859375 0.1234130859375 0.1234130859375 0.1234130859375 0.12347412109375 0.12347412109375 0.12347412109375 0.12359619140625 0.12359619140625 0.12359619140625 0.12359619140625 0.12359619140625 0.12371826171875 0.12371826171875 0.12371826171875 0.123779296875 0.123779296875 0.123779296875 0.123779296875 0.123779296875 0.1239013671875 0.1239013671875 0.1239013671875 0.1240234375 0.1240234375 0.1240234375 0.1240234375 0.1240234375 0.12408447265625 0.12408447265625 0.12408447265625 0.12420654296875 0.12420654296875 0.12420654296875 0.12420654296875 0.12420654296875 0.12432861328125 0.12432861328125 0.12432861328125 0.1243896484375 0.1243896484375 0.1243896484375 0.1243896484375 0.1243896484375 0.12451171875 0.12451171875 0.12451171875 0.12457275390625 0.12457275390625 0.12457275390625 0.12457275390625 0.12457275390625 0.12469482421875 0.12469482421875 0.12469482421875 0.12481689453125 0.12481689453125 0.12481689453125 0.12481689453125 0.12481689453125 0.1248779296875 0.1248779296875 0.1248779296875 0.125 0.125 0.125 0.125 0.125 0.1251220703125 0.1251220703125 0.1251220703125 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.1253662109375 0.1253662109375 0.1253662109375 0.1253662109375 0.1253662109375 0.12548828125 0.12548828125 0.12548828125 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.125732421875 0.125732421875 0.125732421875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1258544921875 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1260986328125 0.1260986328125 0.1260986328125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.12646484375 0.12646484375 0.12646484375 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.126708984375 0.126708984375 0.126708984375 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.126953125 0.1270751953125 0.1270751953125 0.1270751953125 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.1273193359375 0.1273193359375 0.1273193359375 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.12744140625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.127685546875 0.127685546875 0.127685546875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1279296875 0.1279296875 0.1279296875 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.1282958984375 0.1282958984375 0.1282958984375 0.12841796875 0.12841796875 0.12841796875 0.12841796875 0.12841796875 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.128662109375 0.128662109375 0.128662109375 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.12890625 0.12890625 0.12890625 0.1290283203125 0.1290283203125 0.1290283203125 0.1290283203125 0.1290283203125 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.1292724609375 0.1292724609375 0.1292724609375 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.1295166015625 0.1295166015625 0.1295166015625 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1298828125 0.1298828125 0.1298828125 0.1300048828125 0.1300048828125 0.1300048828125 0.1300048828125 0.1300048828125 0.130126953125 0.130126953125 0.130126953125 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.1304931640625 0.1304931640625 0.1304931640625 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.1307373046875 0.1307373046875 0.1307373046875 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.131103515625 0.131103515625 0.131103515625 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.1314697265625 0.1314697265625 0.1314697265625 0.131591796875 0.131591796875 0.131591796875 0.131591796875 0.131591796875 0.1317138671875 0.1317138671875 0.1317138671875 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.132080078125 0.132080078125 0.132080078125 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.13232421875 0.13232421875 0.13232421875 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.132568359375 0.132568359375 0.132568359375 0.132568359375 0.132568359375 0.1326904296875 0.1326904296875 0.1326904296875 0.1328125 0.1328125 0.1328125 0.1328125 0.1328125 0.1329345703125 0.1329345703125 0.1329345703125 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.133056640625 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.13330078125 0.13330078125 0.13330078125 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.1336669921875 0.1336669921875 0.1336669921875 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1339111328125 0.1339111328125 0.1339111328125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.1341552734375 0.13427734375 0.13427734375 0.13427734375 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.134521484375 0.134521484375 0.134521484375 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.1346435546875 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.1348876953125 0.1348876953125 0.1348876953125 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.1351318359375 0.1351318359375 0.1351318359375 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.135498046875 0.135498046875 0.135498046875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1357421875 0.1358642578125 0.1358642578125 0.1358642578125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.1361083984375 0.1361083984375 0.1361083984375 0.13623046875 0.13623046875 0.13623046875 0.13623046875 0.13623046875 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.136474609375 0.136474609375 0.136474609375 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.13671875 0.13671875 0.13671875 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.1370849609375 0.1370849609375 0.1370849609375 0.13720703125 0.13720703125 0.13720703125 0.13720703125 0.13720703125 0.1373291015625 0.1373291015625 0.1373291015625 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1376953125 0.1376953125 0.1376953125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.1380615234375 0.1380615234375 0.1380615234375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.1383056640625 0.1383056640625 0.1383056640625 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.138671875 0.138671875 0.138671875 0.1387939453125 0.1387939453125 0.1387939453125 0.1387939453125 0.1387939453125 0.138916015625 0.138916015625 0.138916015625 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.1392822265625 0.1392822265625 0.1392822265625 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.1395263671875 0.1395263671875 0.1395263671875 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1397705078125 0.1397705078125 0.1397705078125 0.1397705078125 0.1397705078125 0.139892578125 0.139892578125 0.139892578125 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.14013671875 0.14013671875 0.14013671875 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.1402587890625 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.1405029296875 0.1405029296875 0.1405029296875 0.140625 0.140625 0.140625 0.140625 0.140625 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.140869140625 0.140869140625 0.140869140625 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.14111328125 0.14111328125 0.14111328125 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.141357421875 0.1414794921875 0.1414794921875 0.1414794921875 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1417236328125 0.1417236328125 0.1417236328125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.141845703125 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.14208984375 0.14208984375 0.14208984375 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.142333984375 0.142333984375 0.142333984375 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.142578125 0.142578125 0.142578125 0.142578125 0.142578125 0.1427001953125 0.1427001953125 0.1427001953125 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.1429443359375 0.14306640625 0.14306640625 0.14306640625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.143310546875 0.143310546875 0.143310546875 0.1434326171875 0.1434326171875 0.1434326171875 0.1434326171875 0.1434326171875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1436767578125 0.1436767578125 0.1436767578125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.1439208984375 0.1439208984375 0.1439208984375 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.144287109375 0.144287109375 0.144287109375 0.1444091796875 0.1444091796875 0.1444091796875 0.1444091796875 0.1444091796875 0.14453125 0.14453125 0.14453125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.1448974609375 0.1448974609375 0.1448974609375 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.145263671875 0.145263671875 0.145263671875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1455078125 0.1455078125 0.1455078125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.1458740234375 0.1458740234375 0.1458740234375 0.14599609375 0.14599609375 0.14599609375 0.14599609375 0.14599609375 0.1461181640625 0.1461181640625 0.1461181640625 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.146484375 0.146484375 0.146484375 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.146728515625 0.146728515625 0.146728515625 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.14697265625 0.14697265625 0.14697265625 0.14697265625 0.14697265625 0.1470947265625 0.1470947265625 0.1470947265625 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.1473388671875 0.1473388671875 0.1473388671875 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1475830078125 0.1475830078125 0.1475830078125 0.1475830078125 0.1475830078125 0.147705078125 0.147705078125 0.147705078125 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.1480712890625 0.1480712890625 0.1480712890625 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.1483154296875 0.1483154296875 0.1483154296875 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.1485595703125 0.148681640625 0.148681640625 0.148681640625 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.14892578125 0.14892578125 0.14892578125 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.1490478515625 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1495361328125 0.1495361328125 0.1495361328125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.14990234375 0.14990234375 0.14990234375 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.150146484375 0.1502685546875 0.1502685546875 0.1502685546875 0.150390625 0.150390625 0.150390625 0.150390625 0.150390625 0.1505126953125 0.1505126953125 0.1505126953125 0.150634765625 0.150634765625 0.150634765625 0.150634765625 0.150634765625 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.15087890625 0.15087890625 0.15087890625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.151123046875 0.151123046875 0.151123046875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1514892578125 0.1514892578125 0.1514892578125 0.151611328125 0.151611328125 0.151611328125 0.151611328125 0.151611328125 0.1517333984375 0.1517333984375 0.1517333984375 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.152099609375 0.152099609375 0.152099609375 0.1522216796875 0.1522216796875 0.1522216796875 0.1522216796875 0.1522216796875 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.1524658203125 0.1524658203125 0.1524658203125 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.1527099609375 0.1527099609375 0.1527099609375 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.153076171875 0.153076171875 0.153076171875 0.1531982421875 0.1531982421875 0.1531982421875 0.1531982421875 0.1531982421875 0.1533203125 0.1533203125 0.1533203125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.153564453125 0.153564453125 0.0 2.4974346160888672e-05 5.0008296966552734e-05 7.49826431274414e-05 0.00010001659393310547 0.00012505054473876953 0.0001499652862548828 0.00017499923706054688 0.00020003318786621094 0.00022494792938232422 0.00025010108947753906 0.00027489662170410156 0.0002999305725097656 0.0003249645233154297 0.00034999847412109375 0.0003750324249267578 0.0004000663757324219 0.00042510032653808594 0.00044989585876464844 0.0004749298095703125 0.0005002021789550781 0.0005249977111816406 0.0005497932434082031 0.0005750656127929688 0.0005998611450195312 0.0006251335144042969 0.0006499290466308594 0.000675201416015625 0.0006999969482421875 0.00072479248046875 0.0007500648498535156 0.0007748603820800781 0.0008001327514648438 0.0008249282836914062 0.0008502006530761719 0.0008749961853027344 0.0008997917175292969 0.0009250640869140625 0.000949859619140625 0.0009751319885253906 0.0010004043579101562 0.0010251998901367188 0.0010499954223632812 0.0010747909545898438 0.0010995864868164062 0.001125335693359375 0.0011501312255859375 0.0011749267578125 0.0011997222900390625 0.0012254714965820312 0.0012502670288085938 0.0012750625610351562 0.0012998580932617188 0.0013246536254882812 0.00135040283203125 0.0013751983642578125 0.001399993896484375 0.0014247894287109375 0.0014495849609375 0.0014753341674804688 0.0015001296997070312 0.0015249252319335938 0.0015497207641601562 0.001575469970703125 0.0016002655029296875 0.00162506103515625 0.0016498565673828125 0.001674652099609375 0.0017004013061523438 0.0017251968383789062 0.0017499923706054688 0.0017747879028320312 0.0017995834350585938 0.0018253326416015625 0.001850128173828125 0.0018749237060546875 0.00189971923828125 0.0019254684448242188 0.0019502639770507812 0.0019741058349609375 0.0020008087158203125 0.002025604248046875 0.0020503997802734375 0.0020751953125 0.0020999908447265625 0.002124786376953125 0.0021495819091796875 0.00217437744140625 0.0021991729736328125 0.0022258758544921875 0.00225067138671875 0.0022754669189453125 0.002300262451171875 0.0023250579833984375 0.002349853515625 0.0023746490478515625 0.156005859375 0.156005859375 0.156005859375 0.1561279296875 0.1561279296875 0.1561279296875 0.15625 0.15625 0.15625 0.15625 0.15625 0.15625 0.15625 0.15625 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.156494140625 0.156494140625 0.156494140625 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.15673828125 0.15673828125 0.15673828125 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.1571044921875 0.1571044921875 0.1571044921875 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.1573486328125 0.157470703125 0.157470703125 0.157470703125 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.15771484375 0.15771484375 0.15771484375 0.1578369140625 0.1578369140625 0.1578369140625 0.1578369140625 0.1578369140625 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.1580810546875 0.1580810546875 0.1580810546875 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.1583251953125 0.1583251953125 0.1583251953125 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.15869140625 0.15869140625 0.15869140625 0.1588134765625 0.1588134765625 0.1588134765625 0.1588134765625 0.1588134765625 0.158935546875 0.158935546875 0.158935546875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1593017578125 0.1593017578125 0.1593017578125 0.159423828125 0.159423828125 0.159423828125 0.159423828125 0.159423828125 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.15966796875 0.15966796875 0.15966796875 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.159912109375 0.159912109375 0.159912109375 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.1602783203125 0.1602783203125 0.1602783203125 0.160400390625 0.160400390625 0.160400390625 0.160400390625 0.160400390625 0.1605224609375 0.1605224609375 0.1605224609375 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.160888671875 0.160888671875 0.160888671875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1611328125 0.1611328125 0.1611328125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.161376953125 0.161376953125 0.161376953125 0.161376953125 0.161376953125 0.1614990234375 0.1614990234375 0.1614990234375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.161865234375 0.161865234375 0.161865234375 0.1619873046875 0.1619873046875 0.1619873046875 0.1619873046875 0.1619873046875 0.162109375 0.162109375 0.162109375 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.1624755859375 0.1624755859375 0.1624755859375 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.1627197265625 0.1627197265625 0.1627197265625 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.1629638671875 0.1629638671875 0.1629638671875 0.1629638671875 0.1629638671875 0.1630859375 0.1630859375 0.1630859375 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.163330078125 0.163330078125 0.163330078125 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.1634521484375 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.1636962890625 0.1636962890625 0.1636962890625 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1640625 0.1640625 0.1640625 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.164306640625 0.164306640625 0.164306640625 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.16455078125 0.1646728515625 0.1646728515625 0.1646728515625 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1650390625 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.165283203125 0.165283203125 0.165283203125 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.16552734375 0.16552734375 0.16552734375 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.1658935546875 0.1658935546875 0.1658935546875 0.166015625 0.166015625 0.166015625 0.166015625 0.166015625 0.1661376953125 0.1661376953125 0.1661376953125 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.16650390625 0.16650390625 0.16650390625 0.1666259765625 0.1666259765625 0.1666259765625 0.1666259765625 0.1666259765625 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.1668701171875 0.1668701171875 0.1668701171875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1671142578125 0.1671142578125 0.1671142578125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.16748046875 0.16748046875 0.16748046875 0.1676025390625 0.1676025390625 0.1676025390625 0.1676025390625 0.1676025390625 0.167724609375 0.167724609375 0.167724609375 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.1680908203125 0.1680908203125 0.1680908203125 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.1683349609375 0.1683349609375 0.1683349609375 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.168701171875 0.168701171875 0.168701171875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1690673828125 0.1690673828125 0.1690673828125 0.169189453125 0.169189453125 0.169189453125 0.169189453125 0.169189453125 0.1693115234375 0.1693115234375 0.1693115234375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.169677734375 0.169677734375 0.169677734375 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.169921875 0.169921875 0.169921875 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.170166015625 0.170166015625 0.170166015625 0.170166015625 0.170166015625 0.1702880859375 0.1702880859375 0.1702880859375 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.1705322265625 0.1705322265625 0.1705322265625 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.170654296875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1708984375 0.1708984375 0.1708984375 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.1712646484375 0.1712646484375 0.1712646484375 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.1715087890625 0.1715087890625 0.1715087890625 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.1717529296875 0.171875 0.171875 0.171875 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.172119140625 0.172119140625 0.172119140625 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.1722412109375 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.1724853515625 0.1724853515625 0.1724853515625 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.1727294921875 0.1727294921875 0.1727294921875 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.173095703125 0.173095703125 0.173095703125 0.1732177734375 0.1732177734375 0.1732177734375 0.1732177734375 0.1732177734375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.1734619140625 0.1734619140625 0.1734619140625 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.1737060546875 0.1737060546875 0.1737060546875 0.173828125 0.173828125 0.173828125 0.173828125 0.173828125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.174072265625 0.174072265625 0.174072265625 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.17431640625 0.17431640625 0.17431640625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.1746826171875 0.1746826171875 0.1746826171875 0.1748046875 0.1748046875 0.1748046875 0.1748046875 0.1748046875 0.1749267578125 0.1749267578125 0.1749267578125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.17529296875 0.17529296875 0.17529296875 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.175537109375 0.175537109375 0.175537109375 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.1759033203125 0.1759033203125 0.1759033203125 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.17626953125 0.17626953125 0.17626953125 0.1763916015625 0.1763916015625 0.1763916015625 0.1763916015625 0.1763916015625 0.176513671875 0.176513671875 0.176513671875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1768798828125 0.1768798828125 0.1768798828125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.1771240234375 0.1771240234375 0.1771240234375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.1773681640625 0.1773681640625 0.1773681640625 0.1773681640625 0.1773681640625 0.177490234375 0.177490234375 0.177490234375 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.177734375 0.177734375 0.177734375 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.1778564453125 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.1781005859375 0.1781005859375 0.1781005859375 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.178466796875 0.178466796875 0.178466796875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1787109375 0.1787109375 0.1787109375 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.178955078125 0.1790771484375 0.1790771484375 0.1790771484375 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.1793212890625 0.1793212890625 0.1793212890625 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.179443359375 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1796875 0.1796875 0.1796875 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.179931640625 0.179931640625 0.179931640625 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.1802978515625 0.1802978515625 0.1802978515625 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1805419921875 0.1806640625 0.1806640625 0.1806640625 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.180908203125 0.180908203125 0.180908203125 0.1810302734375 0.1810302734375 0.1810302734375 0.1810302734375 0.1810302734375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.1812744140625 0.1812744140625 0.1812744140625 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.1815185546875 0.1815185546875 0.1815185546875 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.181884765625 0.181884765625 0.181884765625 0.1820068359375 0.1820068359375 0.1820068359375 0.1820068359375 0.1820068359375 0.18212890625 0.18212890625 0.18212890625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.1824951171875 0.1824951171875 0.1824951171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.182861328125 0.182861328125 0.182861328125 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.18310546875 0.18310546875 0.18310546875 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.1834716796875 0.1834716796875 0.1834716796875 0.18359375 0.18359375 0.18359375 0.18359375 0.18359375 0.1837158203125 0.1837158203125 0.1837158203125 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.18408203125 0.18408203125 0.18408203125 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.184326171875 0.184326171875 0.184326171875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1845703125 0.1845703125 0.1845703125 0.1845703125 0.1845703125 0.1846923828125 0.1846923828125 0.1846923828125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.1849365234375 0.1849365234375 0.1849365234375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.18505859375 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.185302734375 0.185302734375 0.185302734375 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.1856689453125 0.1856689453125 0.1856689453125 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.1859130859375 0.1859130859375 0.1859130859375 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.1861572265625 0.186279296875 0.186279296875 0.186279296875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1865234375 0.1865234375 0.1865234375 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.1866455078125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.1868896484375 0.1868896484375 0.1868896484375 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.1871337890625 0.1871337890625 0.1871337890625 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1875 0.1875 0.1875 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.187744140625 0.1878662109375 0.1878662109375 0.1878662109375 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.1881103515625 0.1881103515625 0.1881103515625 0.188232421875 0.188232421875 0.188232421875 0.188232421875 0.188232421875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1884765625 0.1884765625 0.1884765625 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.188720703125 0.188720703125 0.188720703125 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.1890869140625 0.1890869140625 0.1890869140625 0.189208984375 0.189208984375 0.189208984375 0.189208984375 0.189208984375 0.1893310546875 0.1893310546875 0.1893310546875 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.189697265625 0.189697265625 0.189697265625 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.1900634765625 0.1900634765625 0.1900634765625 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.1903076171875 0.1903076171875 0.1903076171875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.190673828125 0.190673828125 0.190673828125 0.1907958984375 0.1907958984375 0.1907958984375 0.1907958984375 0.1907958984375 0.19091796875 0.19091796875 0.19091796875 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.1912841796875 0.1912841796875 0.1912841796875 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.1915283203125 0.1915283203125 0.1915283203125 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.1917724609375 0.1917724609375 0.1917724609375 0.1917724609375 0.1917724609375 0.19189453125 0.19189453125 0.19189453125 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.192138671875 0.192138671875 0.192138671875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1923828125 0.1923828125 0.1923828125 0.1923828125 0.1923828125 0.1925048828125 0.1925048828125 0.1925048828125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.19287109375 0.19287109375 0.19287109375 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.193115234375 0.193115234375 0.193115234375 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.193359375 0.1934814453125 0.1934814453125 0.1934814453125 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.1937255859375 0.1937255859375 0.1937255859375 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.19384765625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.194091796875 0.194091796875 0.194091796875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1943359375 0.1943359375 0.1943359375 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.1947021484375 0.1947021484375 0.1947021484375 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.1949462890625 0.195068359375 0.195068359375 0.195068359375 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1953125 0.1953125 0.1953125 0.1954345703125 0.1954345703125 0.1954345703125 0.1954345703125 0.1954345703125 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.1956787109375 0.1956787109375 0.1956787109375 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.1959228515625 0.1959228515625 0.1959228515625 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1962890625 0.1962890625 0.1962890625 0.1964111328125 0.1964111328125 0.1964111328125 0.1964111328125 0.1964111328125 0.196533203125 0.196533203125 0.196533203125 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.1968994140625 0.1968994140625 0.1968994140625 0.197021484375 0.197021484375 0.197021484375 0.197021484375 0.197021484375 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.197265625 0.197265625 0.197265625 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.197509765625 0.197509765625 0.197509765625 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.1978759765625 0.1978759765625 0.1978759765625 0.197998046875 0.197998046875 0.197998046875 0.197998046875 0.197998046875 0.1981201171875 0.1981201171875 0.1981201171875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.198486328125 0.198486328125 0.198486328125 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.19873046875 0.19873046875 0.19873046875 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.198974609375 0.198974609375 0.198974609375 0.198974609375 0.198974609375 0.1990966796875 0.1990966796875 0.1990966796875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.199462890625 0.199462890625 0.199462890625 0.1995849609375 0.1995849609375 0.1995849609375 0.1995849609375 0.1995849609375 0.19970703125 0.19970703125 0.19970703125 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.2000732421875 0.2000732421875 0.2000732421875 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2003173828125 0.2003173828125 0.2003173828125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.2005615234375 0.2005615234375 0.2005615234375 0.2005615234375 0.2005615234375 0.20068359375 0.20068359375 0.20068359375 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.200927734375 0.200927734375 0.200927734375 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.2010498046875 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.2012939453125 0.2012939453125 0.2012939453125 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.2015380859375 0.2015380859375 0.2015380859375 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.201904296875 0.201904296875 0.201904296875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2021484375 0.2022705078125 0.2022705078125 0.2022705078125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.2025146484375 0.2025146484375 0.2025146484375 0.20263671875 0.20263671875 0.20263671875 0.20263671875 0.20263671875 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.202880859375 0.202880859375 0.202880859375 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.203125 0.203125 0.203125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.2034912109375 0.2034912109375 0.2034912109375 0.20361328125 0.20361328125 0.20361328125 0.20361328125 0.20361328125 0.2037353515625 0.2037353515625 0.2037353515625 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2041015625 0.2041015625 0.2041015625 0.2042236328125 0.2042236328125 0.2042236328125 0.2042236328125 0.2042236328125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.2044677734375 0.2044677734375 0.2044677734375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.2047119140625 0.2047119140625 0.2047119140625 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2052001953125 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2061767578125 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.206787109375 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.207763671875 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.208251953125 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2093505859375 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2098388671875 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2108154296875 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.21142578125 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.21240234375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.21337890625 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2139892578125 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2149658203125 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.2154541015625 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216552734375 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.217041015625 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.218017578125 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.2186279296875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2196044921875 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.22119140625 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.22216796875 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.22265625 0.22265625 0.22265625 0.22265625 0.22265625 0.22265625 0.22265625 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2237548828125 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.2242431640625 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.225341796875 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.225830078125 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.226806640625 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2283935546875 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2293701171875 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.2298583984375 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.23095703125 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2314453125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2325439453125 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.2330322265625 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2340087890625 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.235595703125 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.236572265625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2371826171875 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2381591796875 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.2386474609375 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.23974609375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.240234375 0.240234375 0.240234375 0.240234375 0.240234375 0.240234375 0.240234375 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.2412109375 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.2418212890625 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2427978515625 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2437744140625 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.244384765625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.245361328125 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.245849609375 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2469482421875 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.2474365234375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2484130859375 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2490234375 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.328125 0.328125 0.328125 0.328125 0.328125 0.328125 0.328125 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.3291015625 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330078125 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.3310546875 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.33203125 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.333251953125 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.334228515625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.335205078125 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.336181640625 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.33740234375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.33837890625 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.33935546875 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.34033203125 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.34130859375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.342529296875 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.343505859375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.344482421875 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345458984375 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.346435546875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.34765625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.3486328125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349609375 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.3505859375 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.351806640625 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.352783203125 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.353759765625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.354736328125 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.355712890625 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.35693359375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.35791015625 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.35888671875 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.35986328125 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361083984375 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.362060546875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.363037109375 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.364013671875 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.364990234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.3662109375 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.3671875 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.3681640625 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.368896484375 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369140625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.369384765625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.36962890625 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.369873046875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.3701171875 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.370361328125 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.371337890625 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.372314453125 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.373291015625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.374267578125 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.37548828125 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.37646484375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.37744140625 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.37841796875 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.37939453125 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380615234375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.381591796875 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.382568359375 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.383544921875 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.384765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.3857421875 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.38671875 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.3876953125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388671875 0.388671875 0.388671875 0.388671875 0.388671875 0.388671875 0.388671875 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.389892578125 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.390869140625 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.391845703125 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.392822265625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.39404296875 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.39501953125 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.39599609375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.39697265625 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.39794921875 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.399169921875 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400146484375 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.401123046875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.402099609375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.403076171875 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404296875 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.4052734375 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.40625 0.40625 0.40625 0.40625 0.40625 0.40625 0.40625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.4072265625 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.408447265625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.409423828125 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410400390625 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412353515625 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.41357421875 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.41552734375 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.41650390625 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.418701171875 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419677734375 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.421630859375 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.4228515625 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.4248046875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.4267578125 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.427978515625 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.429931640625 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.430908203125 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43310546875 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.43408203125 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.43603515625 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.437255859375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439208984375 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.4423828125 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.4443359375 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.4453125 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447509765625 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.448486328125 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.45068359375 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.45166015625 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.45361328125 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.456787109375 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.458740234375 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.459716796875 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.002399444580078125 0.0024242401123046875 0.0024509429931640625 0.002475738525390625 0.0025005340576171875 0.00252532958984375 0.0025501251220703125 0.002574920654296875 0.0025997161865234375 0.00262451171875 0.0026493072509765625 0.002674102783203125 0.0027008056640625 0.0027256011962890625 0.002750396728515625 0.0027751922607421875 0.00279998779296875 0.0028247833251953125 0.002849578857421875 0.0028743743896484375 0.002899169921875 0.002925872802734375 0.0029506683349609375 0.0029754638671875 0.0030002593994140625 0.003025054931640625 0.0030498504638671875 0.00307464599609375 0.0030994415283203125 0.003124237060546875 0.00315093994140625 0.0031757354736328125 0.003200531005859375 0.0032253265380859375 0.0032501220703125 0.0032749176025390625 0.003299713134765625 0.0033245086669921875 0.00334930419921875 0.0033740997314453125 0.0034008026123046875 0.00342559814453125 0.0034503936767578125 0.003475189208984375 0.0034999847412109375 0.0035247802734375 0.0035495758056640625 0.003574371337890625 0.0035991668701171875 0.0036258697509765625 0.003650665283203125 0.0036754608154296875 0.00370025634765625 0.0037250518798828125 0.003749847412109375 0.0037746429443359375 0.0037994384765625 0.0038242340087890625 0.0038509368896484375 0.003875732421875 0.0039005279541015625 0.003925323486328125 0.003948211669921875 0.00397491455078125 0.004001617431640625 0.004024505615234375 0.00405120849609375 0.0040740966796875 0.004100799560546875 0.004123687744140625 0.004150390625 0.00417327880859375 0.004199981689453125 0.0042266845703125 0.00424957275390625 0.004276275634765625 0.004299163818359375 0.00432586669921875 0.0043487548828125 0.004375457763671875 0.004398345947265625 0.004425048828125 0.004451751708984375 0.004474639892578125 0.0045013427734375 0.00452423095703125 0.004550933837890625 0.004573822021484375 0.00460052490234375 0.0046234130859375 0.004650115966796875 0.00467681884765625 0.00469970703125 0.004726409912109375 0.004749298095703125 0.0047760009765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465087890625 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.466064453125 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468017578125 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.47119140625 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.47314453125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474365234375 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476318359375 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.477294921875 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.4794921875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.48046875 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.482421875 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.483642578125 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.485595703125 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.487548828125 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.48876953125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.49072265625 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.49169921875 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.491943359375 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492431640625 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.492919921875 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.4931640625 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.49365234375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.493896484375 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494384765625 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.494873046875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.4951171875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49560546875 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.49609375 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496337890625 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.496826171875 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.497314453125 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.49755859375 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498046875 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498291015625 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.498779296875 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.499267578125 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.49951171875 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.50048828125 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.5009765625 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.50146484375 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.501953125 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.50244140625 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.5029296875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50341796875 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50390625 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.50439453125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.5048828125 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.50537109375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.505859375 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.50634765625 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.5068359375 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.50732421875 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.5078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.50830078125 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.5087890625 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.50927734375 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.509765625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.51025390625 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.5107421875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51123046875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51171875 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.51220703125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.5126953125 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.51318359375 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.513671875 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.51416015625 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.5146484375 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.51513671875 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.515625 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.51611328125 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.5166015625 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.51708984375 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.517578125 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.51806640625 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.5185546875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51904296875 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.51953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.52001953125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.5205078125 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.52099609375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.521484375 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.52197265625 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.5224609375 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.52294921875 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.5234375 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.52392578125 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.5244140625 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.52490234375 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.525390625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.52587890625 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.5263671875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52685546875 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52734375 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.52783203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.5283203125 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.52880859375 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.529296875 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.52978515625 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.5302734375 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53076171875 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.53173828125 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.5322265625 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.53271484375 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.533203125 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.53369140625 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.5341796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53466796875 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53515625 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.53564453125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.5361328125 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.53662109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.537109375 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.53759765625 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.5380859375 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.53857421875 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.5390625 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.53955078125 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.5400390625 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.54052734375 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.541015625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.54150390625 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.5419921875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54248046875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54296875 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.54345703125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.5439453125 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.54443359375 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.544921875 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.54541015625 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.5458984375 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.54638671875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.546875 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.54736328125 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.5478515625 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.54833984375 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.548828125 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.54931640625 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.5498046875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55029296875 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55078125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.55126953125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.5517578125 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.55224609375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.552734375 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.55322265625 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.5537109375 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.55419921875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.5546875 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.55517578125 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.5556640625 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.55615234375 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.556640625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.55712890625 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.5576171875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55810546875 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55859375 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.55908203125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.5595703125 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.56005859375 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.560546875 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.56103515625 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.5615234375 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.56201171875 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.5625 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.56298828125 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.5634765625 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.56396484375 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.564453125 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.56494140625 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.5654296875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56591796875 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56640625 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.56689453125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.5673828125 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.56787109375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.568359375 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.56884765625 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.5693359375 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.56982421875 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.5703125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.57080078125 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.5712890625 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.57177734375 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.572265625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.57275390625 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.5732421875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57373046875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57421875 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.57470703125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.5751953125 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.57568359375 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.576171875 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.57666015625 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.5771484375 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.57763671875 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.578125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.57861328125 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.5791015625 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.57958984375 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.580078125 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.58056640625 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.5810546875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58154296875 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58203125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.58251953125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.5830078125 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.58349609375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.583984375 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.58447265625 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.5849609375 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.58544921875 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.5859375 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.58642578125 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.5869140625 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.58740234375 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.587890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.58837890625 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.5888671875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58935546875 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.58984375 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.59033203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.5908203125 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.59130859375 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.591796875 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.59228515625 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.5927734375 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59326171875 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59375 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.59423828125 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.5947265625 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.59521484375 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.595703125 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.59619140625 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.5966796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59716796875 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59765625 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.59814453125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.5986328125 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.59912109375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.599609375 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.60009765625 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.6005859375 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.60107421875 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.6015625 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.60205078125 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.6025390625 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.60302734375 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.603515625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.60400390625 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.6044921875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60498046875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60546875 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.60595703125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.6064453125 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.60693359375 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.607421875 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.60791015625 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.6083984375 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.60888671875 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.609375 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.60986328125 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.6103515625 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.61083984375 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.611328125 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.61181640625 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.6123046875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61279296875 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61328125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.61376953125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 0.6142578125 \ No newline at end of file diff --git a/runtime/test/test_files/flash_attn_kvcache_outputs_vcache.data b/runtime/test/test_files/flash_attn_kvcache_outputs_vcache.data new file mode 100644 index 000000000..2c57585ce --- /dev/null +++ b/runtime/test/test_files/flash_attn_kvcache_outputs_vcache.data @@ -0,0 +1 @@ +0.0 2.002716064453125e-05 3.999471664428711e-05 6.002187728881836e-05 7.998943328857422e-05 0.00010001659393310547 0.00011998414993286133 0.0001399517059326172 0.00015997886657714844 0.0001800060272216797 0.00020003318786621094 0.0002199411392211914 0.00023996829986572266 0.0002601146697998047 0.0002799034118652344 0.0002999305725097656 0.0003199577331542969 0.0003399848937988281 0.0003600120544433594 0.0003800392150878906 0.0004000663757324219 0.0004200935363769531 0.0004398822784423828 0.00045990943908691406 0.0004799365997314453 0.0005002021789550781 0.0005202293395996094 0.0005397796630859375 0.0005598068237304688 0.000579833984375 0.0005998611450195312 0.0006198883056640625 0.0006399154663085938 0.000659942626953125 0.0006799697875976562 0.0006999969482421875 0.0007200241088867188 0.00074005126953125 0.0007600784301757812 0.0007801055908203125 0.0008001327514648438 0.000820159912109375 0.0008401870727539062 0.0008602142333984375 0.0008797645568847656 0.0008997917175292969 0.0009198188781738281 0.0009398460388183594 0.0009598731994628906 0.000980377197265625 0.0010004043579101562 0.0010204315185546875 0.0010404586791992188 0.0010595321655273438 0.001079559326171875 0.0010995864868164062 0.0011196136474609375 0.0011396408081054688 0.00115966796875 0.0011796951293945312 0.0011997222900390625 0.0012197494506835938 0.001239776611328125 0.0012598037719726562 0.0012798309326171875 0.0012998580932617188 0.00131988525390625 0.0013399124145507812 0.0013599395751953125 0.0013799667358398438 0.001399993896484375 0.0014200210571289062 0.0014400482177734375 0.0014600753784179688 0.0014801025390625 0.0015001296997070312 0.0015201568603515625 0.0015401840209960938 0.001560211181640625 0.0015802383422851562 0.0016002655029296875 0.0016202926635742188 0.00164031982421875 0.0016603469848632812 0.0016803741455078125 0.0017004013061523438 0.001720428466796875 0.0017404556274414062 0.0017595291137695312 0.0017795562744140625 0.0017995834350585938 0.001819610595703125 0.0018396377563476562 0.0018596649169921875 0.0018796920776367188 0.00189971923828125 0.0019197463989257812 0.0019397735595703125 0.00196075439453125 0.001979827880859375 0.0020008087158203125 0.0020198822021484375 0.002040863037109375 0.0020599365234375 0.0020809173583984375 0.0020999908447265625 0.0021190643310546875 0.002140045166015625 0.00215911865234375 0.0021800994873046875 0.0021991729736328125 0.00222015380859375 0.002239227294921875 0.0022602081298828125 0.0022792816162109375 0.002300262451171875 0.0023193359375 0.0023403167724609375 0.0023593902587890625 0.00238037109375 0.002399444580078125 0.0024204254150390625 0.0024394989013671875 0.002460479736328125 0.00247955322265625 0.0025005340576171875 0.0025196075439453125 0.00254058837890625 0.002559661865234375 0.0025806427001953125 0.0025997161865234375 0.002620697021484375 0.0026397705078125 0.0026607513427734375 0.0026798248291015625 0.0027008056640625 0.002719879150390625 0.0027408599853515625 0.0027599334716796875 0.002780914306640625 0.00279998779296875 0.002819061279296875 0.0028400421142578125 0.0028591156005859375 0.002880096435546875 0.002899169921875 0.0029201507568359375 0.0029392242431640625 0.002960205078125 0.002979278564453125 0.0030002593994140625 0.0030193328857421875 0.003040313720703125 0.00305938720703125 0.0030803680419921875 0.0030994415283203125 0.00312042236328125 0.003139495849609375 0.0031604766845703125 0.0031795501708984375 0.003200531005859375 0.0032196044921875 0.0032405853271484375 0.0032596588134765625 0.0032806396484375 0.003299713134765625 0.0033206939697265625 0.0033397674560546875 0.003360748291015625 0.00337982177734375 0.0034008026123046875 0.0034198760986328125 0.00344085693359375 0.003459930419921875 0.0034809112548828125 0.0034999847412109375 0.0035190582275390625 0.0035400390625 0.003559112548828125 0.0035800933837890625 0.0035991668701171875 0.003620147705078125 0.00363922119140625 0.0036602020263671875 0.0036792755126953125 0.00370025634765625 0.003719329833984375 0.0037403106689453125 0.0037593841552734375 0.003780364990234375 0.0037994384765625 0.0038204193115234375 0.0038394927978515625 0.0038604736328125 0.003879547119140625 0.0039005279541015625 0.0039215087890625 0.003940582275390625 0.00395965576171875 0.003978729248046875 0.004001617431640625 0.00402069091796875 0.004039764404296875 0.004058837890625 0.00408172607421875 0.004100799560546875 0.004119873046875 0.004138946533203125 0.004161834716796875 0.004180908203125 0.004199981689453125 0.00421905517578125 0.004238128662109375 0.004261016845703125 0.00428009033203125 0.004299163818359375 0.0043182373046875 0.00434112548828125 0.004360198974609375 0.0043792724609375 0.004398345947265625 0.004421234130859375 0.0044403076171875 0.004459381103515625 0.00447845458984375 0.0045013427734375 0.004520416259765625 0.00453948974609375 0.004558563232421875 0.004581451416015625 0.00460052490234375 0.004619598388671875 0.004638671875 0.00466156005859375 0.004680633544921875 0.00469970703125 0.004718780517578125 0.004741668701171875 0.0047607421875 0.004779815673828125 0.00479888916015625 0.00482177734375 0.004840850830078125 0.00485992431640625 0.004878997802734375 0.004901885986328125 0.00492095947265625 0.004940032958984375 0.0049591064453125 0.004978179931640625 0.005001068115234375 0.0050201416015625 0.005039215087890625 0.00505828857421875 0.0050811767578125 0.005100250244140625 0.00511932373046875 0.005138397216796875 0.005161285400390625 0.00518035888671875 0.005199432373046875 0.005218505859375 0.00524139404296875 0.005260467529296875 0.005279541015625 0.005298614501953125 0.005321502685546875 0.005340576171875 0.005359649658203125 0.00537872314453125 0.005401611328125 0.005420684814453125 0.00543975830078125 0.005458831787109375 0.005481719970703125 0.00550079345703125 0.005519866943359375 0.0055389404296875 0.00556182861328125 0.005580902099609375 0.0055999755859375 0.005619049072265625 0.00563812255859375 0.0056610107421875 0.005680084228515625 0.00569915771484375 0.005718231201171875 0.005741119384765625 0.00576019287109375 0.005779266357421875 0.00579833984375 0.00582122802734375 0.005840301513671875 0.005859375 0.005878448486328125 0.005901336669921875 0.00592041015625 0.005939483642578125 0.00595855712890625 0.0059814453125 0.006000518798828125 0.00601959228515625 0.006038665771484375 0.006061553955078125 0.00608062744140625 0.006099700927734375 0.0061187744140625 0.00614166259765625 0.006160736083984375 0.0061798095703125 0.006198883056640625 0.006221771240234375 0.0062408447265625 0.006259918212890625 0.00627899169921875 0.0063018798828125 0.006320953369140625 0.00634002685546875 0.006359100341796875 0.006378173828125 0.00640106201171875 0.006420135498046875 0.006439208984375 0.006458282470703125 0.006481170654296875 0.006500244140625 0.006519317626953125 0.00653839111328125 0.006561279296875 0.006580352783203125 0.00659942626953125 0.006618499755859375 0.006641387939453125 0.00666046142578125 0.006679534912109375 0.0066986083984375 0.00672149658203125 0.006740570068359375 0.0067596435546875 0.006778717041015625 0.006801605224609375 0.0068206787109375 0.006839752197265625 0.00685882568359375 0.0068817138671875 0.006900787353515625 0.00691986083984375 0.006938934326171875 0.006961822509765625 0.00698089599609375 0.006999969482421875 0.00701904296875 0.007038116455078125 0.007061004638671875 0.007080078125 0.007099151611328125 0.00711822509765625 0.00714111328125 0.007160186767578125 0.00717926025390625 0.007198333740234375 0.007221221923828125 0.00724029541015625 0.007259368896484375 0.0072784423828125 0.00730133056640625 0.007320404052734375 0.0073394775390625 0.007358551025390625 0.007381439208984375 0.0074005126953125 0.007419586181640625 0.00743865966796875 0.0074615478515625 0.007480621337890625 0.00749969482421875 0.007518768310546875 0.007541656494140625 0.00756072998046875 0.007579803466796875 0.007598876953125 0.00762176513671875 0.007640838623046875 0.007659912109375 0.007678985595703125 0.007701873779296875 0.007720947265625 0.007740020751953125 0.00775909423828125 0.007778167724609375 0.007801055908203125 0.00782012939453125 0.007843017578125 0.0078582763671875 0.00788116455078125 0.00789642333984375 0.0079193115234375 0.00794219970703125 0.00795745849609375 0.0079803466796875 0.00800323486328125 0.00801849365234375 0.0080413818359375 0.008056640625 0.00807952880859375 0.0081024169921875 0.00811767578125 0.00814056396484375 0.0081634521484375 0.0081787109375 0.00820159912109375 0.00821685791015625 0.00823974609375 0.00826263427734375 0.00827789306640625 0.00830078125 0.00832366943359375 0.00833892822265625 0.00836181640625 0.0083770751953125 0.00839996337890625 0.0084228515625 0.0084381103515625 0.00846099853515625 0.00847625732421875 0.0084991455078125 0.00852203369140625 0.00853729248046875 0.0085601806640625 0.00858306884765625 0.00859832763671875 0.0086212158203125 0.008636474609375 0.00865936279296875 0.0086822509765625 0.008697509765625 0.00872039794921875 0.0087432861328125 0.008758544921875 0.00878143310546875 0.00879669189453125 0.008819580078125 0.00884246826171875 0.00885772705078125 0.008880615234375 0.00890350341796875 0.00891876220703125 0.008941650390625 0.0089569091796875 0.00897979736328125 0.009002685546875 0.0090179443359375 0.00904083251953125 0.009063720703125 0.0090789794921875 0.00910186767578125 0.00911712646484375 0.0091400146484375 0.00916290283203125 0.00917816162109375 0.0092010498046875 0.00921630859375 0.00923919677734375 0.0092620849609375 0.00927734375 0.00930023193359375 0.0093231201171875 0.00933837890625 0.00936126708984375 0.00937652587890625 0.0093994140625 0.00942230224609375 0.00943756103515625 0.00946044921875 0.00948333740234375 0.00949859619140625 0.009521484375 0.0095367431640625 0.00955963134765625 0.00958251953125 0.0095977783203125 0.00962066650390625 0.0096435546875 0.0096588134765625 0.00968170166015625 0.00969696044921875 0.0097198486328125 0.00974273681640625 0.00975799560546875 0.0097808837890625 0.00980377197265625 0.00981903076171875 0.0098419189453125 0.009857177734375 0.00988006591796875 0.0099029541015625 0.009918212890625 0.00994110107421875 0.00995635986328125 0.009979248046875 0.01000213623046875 0.01001739501953125 0.010040283203125 0.01006317138671875 0.01007843017578125 0.010101318359375 0.0101165771484375 0.01013946533203125 0.010162353515625 0.0101776123046875 0.01020050048828125 0.010223388671875 0.0102386474609375 0.01026153564453125 0.01027679443359375 0.0102996826171875 0.01032257080078125 0.01033782958984375 0.0103607177734375 0.01038360595703125 0.01039886474609375 0.0104217529296875 0.01043701171875 0.01045989990234375 0.0104827880859375 0.010498046875 0.01052093505859375 0.01053619384765625 0.01055908203125 0.01058197021484375 0.01059722900390625 0.0106201171875 0.01064300537109375 0.01065826416015625 0.01068115234375 0.0106964111328125 0.01071929931640625 0.0107421875 0.0107574462890625 0.01078033447265625 0.01080322265625 0.0108184814453125 0.01084136962890625 0.01085662841796875 0.0108795166015625 0.01090240478515625 0.01091766357421875 0.0109405517578125 0.01096343994140625 0.01097869873046875 0.0110015869140625 0.011016845703125 0.01103973388671875 0.0110626220703125 0.011077880859375 0.01110076904296875 0.0111236572265625 0.011138916015625 0.01116180419921875 0.01117706298828125 0.011199951171875 0.01122283935546875 0.01123809814453125 0.011260986328125 0.0112762451171875 0.01129913330078125 0.011322021484375 0.0113372802734375 0.01136016845703125 0.011383056640625 0.0113983154296875 0.01142120361328125 0.01143646240234375 0.0114593505859375 0.01148223876953125 0.01149749755859375 0.0115203857421875 0.01154327392578125 0.01155853271484375 0.0115814208984375 0.0115966796875 0.01161956787109375 0.0116424560546875 0.01165771484375 0.01168060302734375 0.0117034912109375 0.01171875 0.01174163818359375 0.01175689697265625 0.01177978515625 0.01180267333984375 0.01181793212890625 0.0118408203125 0.01186370849609375 0.01187896728515625 0.01190185546875 0.0119171142578125 0.01194000244140625 0.011962890625 0.0119781494140625 0.01200103759765625 0.01201629638671875 0.0120391845703125 0.01206207275390625 0.01207733154296875 0.0121002197265625 0.01212310791015625 0.01213836669921875 0.0121612548828125 0.012176513671875 0.01219940185546875 0.0122222900390625 0.012237548828125 0.01226043701171875 0.0122833251953125 0.012298583984375 0.01232147216796875 0.01233673095703125 0.012359619140625 0.01238250732421875 0.01239776611328125 0.012420654296875 0.01244354248046875 0.01245880126953125 0.012481689453125 0.0124969482421875 0.01251983642578125 0.012542724609375 0.0125579833984375 0.01258087158203125 0.012603759765625 0.0126190185546875 0.01264190673828125 0.01265716552734375 0.0126800537109375 0.01270294189453125 0.01271820068359375 0.0127410888671875 0.01275634765625 0.01277923583984375 0.0128021240234375 0.0128173828125 0.01284027099609375 0.0128631591796875 0.01287841796875 0.01290130615234375 0.01291656494140625 0.012939453125 0.01296234130859375 0.01297760009765625 0.01300048828125 0.01302337646484375 0.01303863525390625 0.0130615234375 0.0130767822265625 0.01309967041015625 0.01312255859375 0.0131378173828125 0.01316070556640625 0.01318359375 0.0131988525390625 0.01322174072265625 0.01323699951171875 0.0132598876953125 0.01328277587890625 0.01329803466796875 0.0133209228515625 0.01334381103515625 0.01335906982421875 0.0133819580078125 0.013397216796875 0.01342010498046875 0.0134429931640625 0.013458251953125 0.01348114013671875 0.01349639892578125 0.013519287109375 0.01354217529296875 0.01355743408203125 0.013580322265625 0.01360321044921875 0.01361846923828125 0.013641357421875 0.0136566162109375 0.01367950439453125 0.013702392578125 0.0137176513671875 0.01374053955078125 0.013763427734375 0.0137786865234375 0.01380157470703125 0.01381683349609375 0.0138397216796875 0.01386260986328125 0.01387786865234375 0.0139007568359375 0.01392364501953125 0.01393890380859375 0.0139617919921875 0.01397705078125 0.01399993896484375 0.0140228271484375 0.0140380859375 0.01406097412109375 0.01407623291015625 0.01409912109375 0.01412200927734375 0.01413726806640625 0.01416015625 0.01418304443359375 0.01419830322265625 0.01422119140625 0.0142364501953125 0.01425933837890625 0.0142822265625 0.0142974853515625 0.01432037353515625 0.01434326171875 0.0143585205078125 0.01438140869140625 0.01439666748046875 0.0144195556640625 0.01444244384765625 0.01445770263671875 0.0144805908203125 0.01450347900390625 0.01451873779296875 0.0145416259765625 0.014556884765625 0.01457977294921875 0.0146026611328125 0.014617919921875 0.01464080810546875 0.0146636962890625 0.014678955078125 0.01470184326171875 0.01471710205078125 0.014739990234375 0.01476287841796875 0.01477813720703125 0.014801025390625 0.0148162841796875 0.01483917236328125 0.014862060546875 0.0148773193359375 0.01490020751953125 0.014923095703125 0.0149383544921875 0.01496124267578125 0.01497650146484375 0.0149993896484375 0.01502227783203125 0.01503753662109375 0.0150604248046875 0.01508331298828125 0.01509857177734375 0.0151214599609375 0.01513671875 0.01515960693359375 0.0151824951171875 0.01519775390625 0.01522064208984375 0.0152435302734375 0.0152587890625 0.01528167724609375 0.01529693603515625 0.01531982421875 0.01534271240234375 0.01535797119140625 0.015380859375 0.01540374755859375 0.01541900634765625 0.01544189453125 0.0154571533203125 0.01548004150390625 0.0155029296875 0.0155181884765625 0.01554107666015625 0.01555633544921875 0.0155792236328125 0.01560211181640625 0.01561737060546875 0.0156402587890625 0.015655517578125 0.01568603515625 0.0157012939453125 0.015716552734375 0.0157470703125 0.0157623291015625 0.015777587890625 0.0157928466796875 0.0158233642578125 0.015838623046875 0.0158538818359375 0.0158843994140625 0.015899658203125 0.0159149169921875 0.0159454345703125 0.015960693359375 0.0159759521484375 0.0160064697265625 0.016021728515625 0.0160369873046875 0.0160675048828125 0.016082763671875 0.0160980224609375 0.01611328125 0.016143798828125 0.0161590576171875 0.01617431640625 0.016204833984375 0.0162200927734375 0.0162353515625 0.016265869140625 0.0162811279296875 0.01629638671875 0.016326904296875 0.0163421630859375 0.016357421875 0.0163726806640625 0.0164031982421875 0.01641845703125 0.0164337158203125 0.0164642333984375 0.0164794921875 0.0164947509765625 0.0165252685546875 0.01654052734375 0.0165557861328125 0.0165863037109375 0.0166015625 0.0166168212890625 0.0166473388671875 0.01666259765625 0.0166778564453125 0.016693115234375 0.0167236328125 0.0167388916015625 0.016754150390625 0.01678466796875 0.0167999267578125 0.016815185546875 0.016845703125 0.0168609619140625 0.016876220703125 0.01690673828125 0.0169219970703125 0.016937255859375 0.0169525146484375 0.0169830322265625 0.016998291015625 0.0170135498046875 0.0170440673828125 0.017059326171875 0.0170745849609375 0.0171051025390625 0.017120361328125 0.0171356201171875 0.0171661376953125 0.017181396484375 0.0171966552734375 0.0172271728515625 0.017242431640625 0.0172576904296875 0.01727294921875 0.017303466796875 0.0173187255859375 0.017333984375 0.017364501953125 0.0173797607421875 0.01739501953125 0.017425537109375 0.0174407958984375 0.0174560546875 0.017486572265625 0.0175018310546875 0.01751708984375 0.017547607421875 0.0175628662109375 0.017578125 0.0175933837890625 0.0176239013671875 0.01763916015625 0.0176544189453125 0.0176849365234375 0.0177001953125 0.0177154541015625 0.0177459716796875 0.01776123046875 0.0177764892578125 0.0178070068359375 0.017822265625 0.0178375244140625 0.017852783203125 0.01788330078125 0.0178985595703125 0.017913818359375 0.0179443359375 0.0179595947265625 0.017974853515625 0.01800537109375 0.0180206298828125 0.018035888671875 0.01806640625 0.0180816650390625 0.018096923828125 0.01812744140625 0.0181427001953125 0.018157958984375 0.0181732177734375 0.0182037353515625 0.018218994140625 0.0182342529296875 0.0182647705078125 0.018280029296875 0.0182952880859375 0.0183258056640625 0.018341064453125 0.0183563232421875 0.0183868408203125 0.018402099609375 0.0184173583984375 0.0184326171875 0.018463134765625 0.0184783935546875 0.01849365234375 0.018524169921875 0.0185394287109375 0.0185546875 0.018585205078125 0.0186004638671875 0.01861572265625 0.018646240234375 0.0186614990234375 0.0186767578125 0.018707275390625 0.0187225341796875 0.01873779296875 0.0187530517578125 0.0187835693359375 0.018798828125 0.0188140869140625 0.0188446044921875 0.01885986328125 0.0188751220703125 0.0189056396484375 0.0189208984375 0.0189361572265625 0.0189666748046875 0.01898193359375 0.0189971923828125 0.019012451171875 0.01904296875 0.0190582275390625 0.019073486328125 0.01910400390625 0.0191192626953125 0.019134521484375 0.0191650390625 0.0191802978515625 0.019195556640625 0.01922607421875 0.0192413330078125 0.019256591796875 0.019287109375 0.0193023681640625 0.019317626953125 0.0193328857421875 0.0193634033203125 0.019378662109375 0.0193939208984375 0.0194244384765625 0.019439697265625 0.0194549560546875 0.0194854736328125 0.019500732421875 0.0195159912109375 0.0195465087890625 0.019561767578125 0.0195770263671875 0.0196075439453125 0.019622802734375 0.0196380615234375 0.0196533203125 0.019683837890625 0.0196990966796875 0.01971435546875 0.019744873046875 0.0197601318359375 0.019775390625 0.019805908203125 0.0198211669921875 0.01983642578125 0.019866943359375 0.0198822021484375 0.0198974609375 0.0199127197265625 0.0199432373046875 0.01995849609375 0.0199737548828125 0.0200042724609375 0.02001953125 0.0200347900390625 0.0200653076171875 0.02008056640625 0.0200958251953125 0.0201263427734375 0.0201416015625 0.0201568603515625 0.0201873779296875 0.02020263671875 0.0202178955078125 0.020233154296875 0.020263671875 0.0202789306640625 0.020294189453125 0.02032470703125 0.0203399658203125 0.020355224609375 0.0203857421875 0.0204010009765625 0.020416259765625 0.02044677734375 0.0204620361328125 0.020477294921875 0.0204925537109375 0.0205230712890625 0.020538330078125 0.0205535888671875 0.0205841064453125 0.020599365234375 0.0206146240234375 0.0206451416015625 0.020660400390625 0.0206756591796875 0.0207061767578125 0.020721435546875 0.0207366943359375 0.0207672119140625 0.020782470703125 0.0207977294921875 0.02081298828125 0.020843505859375 0.0208587646484375 0.0208740234375 0.020904541015625 0.0209197998046875 0.02093505859375 0.020965576171875 0.0209808349609375 0.02099609375 0.021026611328125 0.0210418701171875 0.02105712890625 0.0210723876953125 0.0211029052734375 0.0211181640625 0.0211334228515625 0.0211639404296875 0.02117919921875 0.0211944580078125 0.0212249755859375 0.021240234375 0.0212554931640625 0.0212860107421875 0.02130126953125 0.0213165283203125 0.0213470458984375 0.0213623046875 0.0213775634765625 0.021392822265625 0.02142333984375 0.0214385986328125 0.021453857421875 0.021484375 0.0214996337890625 0.021514892578125 0.02154541015625 0.0215606689453125 0.021575927734375 0.0216064453125 0.0216217041015625 0.021636962890625 0.02166748046875 0.0216827392578125 0.021697998046875 0.0217132568359375 0.0217437744140625 0.021759033203125 0.0217742919921875 0.0218048095703125 0.021820068359375 0.0218353271484375 0.0218658447265625 0.021881103515625 0.0218963623046875 0.0219268798828125 0.021942138671875 0.0219573974609375 0.02197265625 0.022003173828125 0.0220184326171875 0.02203369140625 0.022064208984375 0.0220794677734375 0.0220947265625 0.022125244140625 0.0221405029296875 0.02215576171875 0.022186279296875 0.0222015380859375 0.022216796875 0.022247314453125 0.0222625732421875 0.02227783203125 0.0222930908203125 0.0223236083984375 0.0223388671875 0.0223541259765625 0.0223846435546875 0.02239990234375 0.0224151611328125 0.0224456787109375 0.0224609375 0.0224761962890625 0.0225067138671875 0.02252197265625 0.0225372314453125 0.022552490234375 0.0225830078125 0.0225982666015625 0.022613525390625 0.02264404296875 0.0226593017578125 0.022674560546875 0.022705078125 0.0227203369140625 0.022735595703125 0.02276611328125 0.0227813720703125 0.022796630859375 0.0228271484375 0.0228424072265625 0.022857666015625 0.0228729248046875 0.0229034423828125 0.022918701171875 0.0229339599609375 0.0229644775390625 0.022979736328125 0.0229949951171875 0.0230255126953125 0.023040771484375 0.0230560302734375 0.0230865478515625 0.023101806640625 0.0231170654296875 0.0231475830078125 0.023162841796875 0.0231781005859375 0.023193359375 0.023223876953125 0.0232391357421875 0.02325439453125 0.023284912109375 0.0233001708984375 0.0233154296875 0.023345947265625 0.0233612060546875 0.02337646484375 0.023406982421875 0.0234222412109375 0.0234375 0.0234527587890625 0.0234832763671875 0.02349853515625 0.0235137939453125 0.0235443115234375 0.0235595703125 0.0235748291015625 0.0236053466796875 0.02362060546875 0.0236358642578125 0.0236663818359375 0.023681640625 0.0236968994140625 0.0237274169921875 0.02374267578125 0.0237579345703125 0.023773193359375 0.0238037109375 0.0238189697265625 0.023834228515625 0.02386474609375 0.0238800048828125 0.023895263671875 0.02392578125 0.0239410400390625 0.023956298828125 0.02398681640625 0.0240020751953125 0.024017333984375 0.0240325927734375 0.0240631103515625 0.024078369140625 0.0240936279296875 0.0241241455078125 0.024139404296875 0.0241546630859375 0.0241851806640625 0.024200439453125 0.0242156982421875 0.0242462158203125 0.024261474609375 0.0242767333984375 0.0243072509765625 0.024322509765625 0.0243377685546875 0.02435302734375 0.024383544921875 0.0243988037109375 0.0244140625 0.024444580078125 0.0244598388671875 0.02447509765625 0.024505615234375 0.0245208740234375 0.0245361328125 0.024566650390625 0.0245819091796875 0.02459716796875 0.0246124267578125 0.0246429443359375 0.024658203125 0.0246734619140625 0.0247039794921875 0.02471923828125 0.0247344970703125 0.0247650146484375 0.0247802734375 0.0247955322265625 0.0248260498046875 0.02484130859375 0.0248565673828125 0.0248870849609375 0.02490234375 0.0249176025390625 0.024932861328125 0.02496337890625 0.0249786376953125 0.024993896484375 0.0250244140625 0.0250396728515625 0.025054931640625 0.02508544921875 0.0251007080078125 0.025115966796875 0.025146484375 0.0251617431640625 0.025177001953125 0.02520751953125 0.0252227783203125 0.025238037109375 0.0252532958984375 0.0252838134765625 0.025299072265625 0.0253143310546875 0.0253448486328125 0.025360107421875 0.0253753662109375 0.0254058837890625 0.025421142578125 0.0254364013671875 0.0254669189453125 0.025482177734375 0.0254974365234375 0.0255126953125 0.025543212890625 0.0255584716796875 0.02557373046875 0.025604248046875 0.0256195068359375 0.025634765625 0.025665283203125 0.0256805419921875 0.02569580078125 0.025726318359375 0.0257415771484375 0.0257568359375 0.025787353515625 0.0258026123046875 0.02581787109375 0.0258331298828125 0.0258636474609375 0.02587890625 0.0258941650390625 0.0259246826171875 0.02593994140625 0.0259552001953125 0.0259857177734375 0.0260009765625 0.0260162353515625 0.0260467529296875 0.02606201171875 0.0260772705078125 0.026092529296875 0.026123046875 0.0261383056640625 0.026153564453125 0.02618408203125 0.0261993408203125 0.026214599609375 0.0262451171875 0.0262603759765625 0.026275634765625 0.02630615234375 0.0263214111328125 0.026336669921875 0.0263671875 0.0263824462890625 0.026397705078125 0.0264129638671875 0.0264434814453125 0.026458740234375 0.0264739990234375 0.0265045166015625 0.026519775390625 0.0265350341796875 0.0265655517578125 0.026580810546875 0.0265960693359375 0.0266265869140625 0.026641845703125 0.0266571044921875 0.0266876220703125 0.026702880859375 0.0267181396484375 0.0267333984375 0.026763916015625 0.0267791748046875 0.02679443359375 0.026824951171875 0.0268402099609375 0.02685546875 0.026885986328125 0.0269012451171875 0.02691650390625 0.026947021484375 0.0269622802734375 0.0269775390625 0.0269927978515625 0.0270233154296875 0.02703857421875 0.0270538330078125 0.0270843505859375 0.027099609375 0.0271148681640625 0.0271453857421875 0.02716064453125 0.0271759033203125 0.0272064208984375 0.0272216796875 0.0272369384765625 0.0272674560546875 0.02728271484375 0.0272979736328125 0.027313232421875 0.02734375 0.0273590087890625 0.027374267578125 0.02740478515625 0.0274200439453125 0.027435302734375 0.0274658203125 0.0274810791015625 0.027496337890625 0.02752685546875 0.0275421142578125 0.027557373046875 0.0275726318359375 0.0276031494140625 0.027618408203125 0.0276336669921875 0.0276641845703125 0.027679443359375 0.0276947021484375 0.0277252197265625 0.027740478515625 0.0277557373046875 0.0277862548828125 0.027801513671875 0.0278167724609375 0.0278472900390625 0.027862548828125 0.0278778076171875 0.02789306640625 0.027923583984375 0.0279388427734375 0.0279541015625 0.027984619140625 0.0279998779296875 0.02801513671875 0.028045654296875 0.0280609130859375 0.028076171875 0.028106689453125 0.0281219482421875 0.02813720703125 0.0281524658203125 0.0281829833984375 0.0281982421875 0.0282135009765625 0.0282440185546875 0.02825927734375 0.0282745361328125 0.0283050537109375 0.0283203125 0.0283355712890625 0.0283660888671875 0.02838134765625 0.0283966064453125 0.0284271240234375 0.0284423828125 0.0284576416015625 0.028472900390625 0.02850341796875 0.0285186767578125 0.028533935546875 0.028564453125 0.0285797119140625 0.028594970703125 0.02862548828125 0.0286407470703125 0.028656005859375 0.0286865234375 0.0287017822265625 0.028717041015625 0.02874755859375 0.0287628173828125 0.028778076171875 0.0287933349609375 0.0288238525390625 0.028839111328125 0.0288543701171875 0.0288848876953125 0.028900146484375 0.0289154052734375 0.0289459228515625 0.028961181640625 0.0289764404296875 0.0290069580078125 0.029022216796875 0.0290374755859375 0.029052734375 0.029083251953125 0.0290985107421875 0.02911376953125 0.029144287109375 0.0291595458984375 0.0291748046875 0.029205322265625 0.0292205810546875 0.02923583984375 0.029266357421875 0.0292816162109375 0.029296875 0.029327392578125 0.0293426513671875 0.02935791015625 0.0293731689453125 0.0294036865234375 0.0294189453125 0.0294342041015625 0.0294647216796875 0.02947998046875 0.0294952392578125 0.0295257568359375 0.029541015625 0.0295562744140625 0.0295867919921875 0.02960205078125 0.0296173095703125 0.029632568359375 0.0296630859375 0.0296783447265625 0.029693603515625 0.02972412109375 0.0297393798828125 0.029754638671875 0.02978515625 0.0298004150390625 0.029815673828125 0.02984619140625 0.0298614501953125 0.029876708984375 0.0299072265625 0.0299224853515625 0.029937744140625 0.0299530029296875 0.0299835205078125 0.029998779296875 0.0300140380859375 0.0300445556640625 0.030059814453125 0.0300750732421875 0.0301055908203125 0.030120849609375 0.0301361083984375 0.0301666259765625 0.030181884765625 0.0301971435546875 0.03021240234375 0.030242919921875 0.0302581787109375 0.0302734375 0.030303955078125 0.0303192138671875 0.03033447265625 0.030364990234375 0.0303802490234375 0.0303955078125 0.030426025390625 0.0304412841796875 0.03045654296875 0.030487060546875 0.0305023193359375 0.030517578125 0.0305328369140625 0.0305633544921875 0.03057861328125 0.0305938720703125 0.0306243896484375 0.0306396484375 0.0306549072265625 0.0306854248046875 0.03070068359375 0.0307159423828125 0.0307464599609375 0.03076171875 0.0307769775390625 0.0308074951171875 0.03082275390625 0.0308380126953125 0.030853271484375 0.0308837890625 0.0308990478515625 0.030914306640625 0.03094482421875 0.0309600830078125 0.030975341796875 0.031005859375 0.0310211181640625 0.031036376953125 0.03106689453125 0.0310821533203125 0.031097412109375 0.0311126708984375 0.0311431884765625 0.031158447265625 0.0311737060546875 0.0312042236328125 0.031219482421875 0.0312347412109375 0.03125 0.031280517578125 0.03131103515625 0.03131103515625 0.031341552734375 0.0313720703125 0.0313720703125 0.031402587890625 0.03143310546875 0.03143310546875 0.031463623046875 0.031494140625 0.031494140625 0.031524658203125 0.03155517578125 0.03155517578125 0.031585693359375 0.031585693359375 0.0316162109375 0.031646728515625 0.031646728515625 0.03167724609375 0.031707763671875 0.031707763671875 0.03173828125 0.031768798828125 0.031768798828125 0.03179931640625 0.031829833984375 0.031829833984375 0.0318603515625 0.031890869140625 0.031890869140625 0.03192138671875 0.031951904296875 0.031951904296875 0.031982421875 0.032012939453125 0.032012939453125 0.03204345703125 0.032073974609375 0.032073974609375 0.0321044921875 0.032135009765625 0.032135009765625 0.03216552734375 0.03216552734375 0.032196044921875 0.0322265625 0.0322265625 0.032257080078125 0.03228759765625 0.03228759765625 0.032318115234375 0.0323486328125 0.0323486328125 0.032379150390625 0.03240966796875 0.03240966796875 0.032440185546875 0.032470703125 0.032470703125 0.032501220703125 0.03253173828125 0.03253173828125 0.032562255859375 0.0325927734375 0.0325927734375 0.032623291015625 0.03265380859375 0.03265380859375 0.032684326171875 0.03271484375 0.03271484375 0.032745361328125 0.032745361328125 0.03277587890625 0.032806396484375 0.032806396484375 0.0328369140625 0.032867431640625 0.032867431640625 0.03289794921875 0.032928466796875 0.032928466796875 0.032958984375 0.032989501953125 0.032989501953125 0.03302001953125 0.033050537109375 0.033050537109375 0.0330810546875 0.033111572265625 0.033111572265625 0.03314208984375 0.033172607421875 0.033172607421875 0.033203125 0.033233642578125 0.033233642578125 0.03326416015625 0.033294677734375 0.033294677734375 0.0333251953125 0.0333251953125 0.033355712890625 0.03338623046875 0.03338623046875 0.033416748046875 0.033447265625 0.033447265625 0.033477783203125 0.03350830078125 0.03350830078125 0.033538818359375 0.0335693359375 0.0335693359375 0.033599853515625 0.03363037109375 0.03363037109375 0.033660888671875 0.03369140625 0.03369140625 0.033721923828125 0.03375244140625 0.03375244140625 0.033782958984375 0.0338134765625 0.0338134765625 0.033843994140625 0.03387451171875 0.03387451171875 0.033905029296875 0.033905029296875 0.033935546875 0.033966064453125 0.033966064453125 0.03399658203125 0.034027099609375 0.034027099609375 0.0340576171875 0.034088134765625 0.034088134765625 0.03411865234375 0.034149169921875 0.034149169921875 0.0341796875 0.034210205078125 0.034210205078125 0.03424072265625 0.034271240234375 0.034271240234375 0.0343017578125 0.034332275390625 0.034332275390625 0.03436279296875 0.034393310546875 0.034393310546875 0.034423828125 0.034454345703125 0.034454345703125 0.03448486328125 0.03448486328125 0.034515380859375 0.0345458984375 0.0345458984375 0.034576416015625 0.03460693359375 0.03460693359375 0.034637451171875 0.03466796875 0.03466796875 0.034698486328125 0.03472900390625 0.03472900390625 0.034759521484375 0.0347900390625 0.0347900390625 0.034820556640625 0.03485107421875 0.03485107421875 0.034881591796875 0.034912109375 0.034912109375 0.034942626953125 0.03497314453125 0.03497314453125 0.035003662109375 0.0350341796875 0.0350341796875 0.035064697265625 0.03509521484375 0.03509521484375 0.035125732421875 0.035125732421875 0.03515625 0.035186767578125 0.035186767578125 0.03521728515625 0.035247802734375 0.035247802734375 0.0352783203125 0.035308837890625 0.035308837890625 0.03533935546875 0.035369873046875 0.035369873046875 0.035400390625 0.035430908203125 0.035430908203125 0.03546142578125 0.035491943359375 0.035491943359375 0.0355224609375 0.035552978515625 0.035552978515625 0.03558349609375 0.035614013671875 0.035614013671875 0.03564453125 0.035675048828125 0.035675048828125 0.03570556640625 0.03570556640625 0.035736083984375 0.0357666015625 0.0357666015625 0.035797119140625 0.03582763671875 0.03582763671875 0.035858154296875 0.035888671875 0.035888671875 0.035919189453125 0.03594970703125 0.03594970703125 0.035980224609375 0.0360107421875 0.0360107421875 0.036041259765625 0.03607177734375 0.03607177734375 0.036102294921875 0.0361328125 0.0361328125 0.036163330078125 0.03619384765625 0.03619384765625 0.036224365234375 0.0362548828125 0.0362548828125 0.036285400390625 0.036285400390625 0.03631591796875 0.036346435546875 0.036346435546875 0.036376953125 0.036407470703125 0.036407470703125 0.03643798828125 0.036468505859375 0.036468505859375 0.0364990234375 0.036529541015625 0.036529541015625 0.03656005859375 0.036590576171875 0.036590576171875 0.03662109375 0.036651611328125 0.036651611328125 0.03668212890625 0.036712646484375 0.036712646484375 0.0367431640625 0.036773681640625 0.036773681640625 0.03680419921875 0.036834716796875 0.036834716796875 0.036865234375 0.036865234375 0.036895751953125 0.03692626953125 0.03692626953125 0.036956787109375 0.0369873046875 0.0369873046875 0.037017822265625 0.03704833984375 0.03704833984375 0.037078857421875 0.037109375 0.037109375 0.037139892578125 0.03717041015625 0.03717041015625 0.037200927734375 0.0372314453125 0.0372314453125 0.037261962890625 0.03729248046875 0.03729248046875 0.037322998046875 0.037353515625 0.037353515625 0.037384033203125 0.03741455078125 0.03741455078125 0.037445068359375 0.037445068359375 0.0374755859375 0.037506103515625 0.037506103515625 0.03753662109375 0.037567138671875 0.037567138671875 0.03759765625 0.037628173828125 0.037628173828125 0.03765869140625 0.037689208984375 0.037689208984375 0.0377197265625 0.037750244140625 0.037750244140625 0.03778076171875 0.037811279296875 0.037811279296875 0.037841796875 0.037872314453125 0.037872314453125 0.03790283203125 0.037933349609375 0.037933349609375 0.0379638671875 0.037994384765625 0.037994384765625 0.03802490234375 0.03802490234375 0.038055419921875 0.0380859375 0.0380859375 0.038116455078125 0.03814697265625 0.03814697265625 0.038177490234375 0.0382080078125 0.0382080078125 0.038238525390625 0.03826904296875 0.03826904296875 0.038299560546875 0.038330078125 0.038330078125 0.038360595703125 0.03839111328125 0.03839111328125 0.038421630859375 0.0384521484375 0.0384521484375 0.038482666015625 0.03851318359375 0.03851318359375 0.038543701171875 0.03857421875 0.03857421875 0.038604736328125 0.03863525390625 0.03863525390625 0.038665771484375 0.038665771484375 0.0386962890625 0.038726806640625 0.038726806640625 0.03875732421875 0.038787841796875 0.038787841796875 0.038818359375 0.038848876953125 0.038848876953125 0.03887939453125 0.038909912109375 0.038909912109375 0.0389404296875 0.038970947265625 0.038970947265625 0.03900146484375 0.039031982421875 0.039031982421875 0.0390625 0.039093017578125 0.039093017578125 0.03912353515625 0.039154052734375 0.039154052734375 0.0391845703125 0.039215087890625 0.039215087890625 0.03924560546875 0.03924560546875 0.039276123046875 0.039306640625 0.039306640625 0.039337158203125 0.03936767578125 0.03936767578125 0.039398193359375 0.0394287109375 0.0394287109375 0.039459228515625 0.03948974609375 0.03948974609375 0.039520263671875 0.03955078125 0.03955078125 0.039581298828125 0.03961181640625 0.03961181640625 0.039642333984375 0.0396728515625 0.0396728515625 0.039703369140625 0.03973388671875 0.03973388671875 0.039764404296875 0.039794921875 0.039794921875 0.039825439453125 0.039825439453125 0.03985595703125 0.039886474609375 0.039886474609375 0.0399169921875 0.039947509765625 0.039947509765625 0.03997802734375 0.040008544921875 0.040008544921875 0.0400390625 0.040069580078125 0.040069580078125 0.04010009765625 0.040130615234375 0.040130615234375 0.0401611328125 0.040191650390625 0.040191650390625 0.04022216796875 0.040252685546875 0.040252685546875 0.040283203125 0.040313720703125 0.040313720703125 0.04034423828125 0.040374755859375 0.040374755859375 0.0404052734375 0.0404052734375 0.040435791015625 0.04046630859375 0.04046630859375 0.040496826171875 0.04052734375 0.04052734375 0.040557861328125 0.04058837890625 0.04058837890625 0.040618896484375 0.0406494140625 0.0406494140625 0.040679931640625 0.04071044921875 0.04071044921875 0.040740966796875 0.040771484375 0.040771484375 0.040802001953125 0.04083251953125 0.04083251953125 0.040863037109375 0.0408935546875 0.0408935546875 0.040924072265625 0.04095458984375 0.04095458984375 0.04095458984375 0.040985107421875 0.041046142578125 0.041046142578125 0.041046142578125 0.04107666015625 0.041107177734375 0.041107177734375 0.041107177734375 0.041168212890625 0.04119873046875 0.04119873046875 0.04119873046875 0.041229248046875 0.041290283203125 0.041290283203125 0.041290283203125 0.04132080078125 0.041351318359375 0.041351318359375 0.041351318359375 0.041412353515625 0.04144287109375 0.04144287109375 0.04144287109375 0.041473388671875 0.041534423828125 0.041534423828125 0.041534423828125 0.04156494140625 0.041595458984375 0.041595458984375 0.041595458984375 0.0416259765625 0.04168701171875 0.04168701171875 0.04168701171875 0.041717529296875 0.041748046875 0.041748046875 0.041748046875 0.04180908203125 0.041839599609375 0.041839599609375 0.041839599609375 0.0418701171875 0.04193115234375 0.04193115234375 0.04193115234375 0.041961669921875 0.0419921875 0.0419921875 0.0419921875 0.04205322265625 0.042083740234375 0.042083740234375 0.042083740234375 0.0421142578125 0.042144775390625 0.042144775390625 0.042144775390625 0.042205810546875 0.042236328125 0.042236328125 0.042236328125 0.042266845703125 0.042327880859375 0.042327880859375 0.042327880859375 0.0423583984375 0.042388916015625 0.042388916015625 0.042388916015625 0.042449951171875 0.04248046875 0.04248046875 0.04248046875 0.042510986328125 0.042572021484375 0.042572021484375 0.042572021484375 0.0426025390625 0.042633056640625 0.042633056640625 0.042633056640625 0.042694091796875 0.042724609375 0.042724609375 0.042724609375 0.042755126953125 0.04278564453125 0.04278564453125 0.04278564453125 0.0428466796875 0.042877197265625 0.042877197265625 0.042877197265625 0.04290771484375 0.04296875 0.04296875 0.04296875 0.042999267578125 0.04302978515625 0.04302978515625 0.04302978515625 0.0430908203125 0.043121337890625 0.043121337890625 0.043121337890625 0.04315185546875 0.043212890625 0.043212890625 0.043212890625 0.043243408203125 0.04327392578125 0.04327392578125 0.04327392578125 0.0433349609375 0.043365478515625 0.043365478515625 0.043365478515625 0.04339599609375 0.043426513671875 0.043426513671875 0.043426513671875 0.043487548828125 0.04351806640625 0.04351806640625 0.04351806640625 0.043548583984375 0.043609619140625 0.043609619140625 0.043609619140625 0.04364013671875 0.043670654296875 0.043670654296875 0.043670654296875 0.043731689453125 0.04376220703125 0.04376220703125 0.04376220703125 0.043792724609375 0.043853759765625 0.043853759765625 0.043853759765625 0.04388427734375 0.043914794921875 0.043914794921875 0.043914794921875 0.0439453125 0.04400634765625 0.04400634765625 0.04400634765625 0.044036865234375 0.0440673828125 0.0440673828125 0.0440673828125 0.04412841796875 0.044158935546875 0.044158935546875 0.044158935546875 0.044189453125 0.04425048828125 0.04425048828125 0.04425048828125 0.044281005859375 0.0443115234375 0.0443115234375 0.0443115234375 0.04437255859375 0.044403076171875 0.044403076171875 0.044403076171875 0.04443359375 0.04449462890625 0.04449462890625 0.04449462890625 0.044525146484375 0.0445556640625 0.0445556640625 0.0445556640625 0.044586181640625 0.044647216796875 0.044647216796875 0.044647216796875 0.044677734375 0.044708251953125 0.044708251953125 0.044708251953125 0.044769287109375 0.0447998046875 0.0447998046875 0.0447998046875 0.044830322265625 0.044891357421875 0.044891357421875 0.044891357421875 0.044921875 0.044952392578125 0.044952392578125 0.044952392578125 0.045013427734375 0.0450439453125 0.0450439453125 0.0450439453125 0.045074462890625 0.04510498046875 0.04510498046875 0.04510498046875 0.045166015625 0.045196533203125 0.045196533203125 0.045196533203125 0.04522705078125 0.0452880859375 0.0452880859375 0.0452880859375 0.045318603515625 0.04534912109375 0.04534912109375 0.04534912109375 0.04541015625 0.045440673828125 0.045440673828125 0.045440673828125 0.04547119140625 0.0455322265625 0.0455322265625 0.0455322265625 0.045562744140625 0.04559326171875 0.04559326171875 0.04559326171875 0.045654296875 0.045684814453125 0.045684814453125 0.045684814453125 0.04571533203125 0.045745849609375 0.045745849609375 0.045745849609375 0.045806884765625 0.04583740234375 0.04583740234375 0.04583740234375 0.045867919921875 0.045928955078125 0.045928955078125 0.045928955078125 0.04595947265625 0.045989990234375 0.045989990234375 0.045989990234375 0.046051025390625 0.04608154296875 0.04608154296875 0.04608154296875 0.046112060546875 0.046173095703125 0.046173095703125 0.046173095703125 0.04620361328125 0.046234130859375 0.046234130859375 0.046234130859375 0.046295166015625 0.04632568359375 0.04632568359375 0.04632568359375 0.046356201171875 0.04638671875 0.04638671875 0.04638671875 0.04644775390625 0.046478271484375 0.046478271484375 0.046478271484375 0.0465087890625 0.04656982421875 0.04656982421875 0.04656982421875 0.046600341796875 0.046630859375 0.046630859375 0.046630859375 0.04669189453125 0.046722412109375 0.046722412109375 0.046722412109375 0.0467529296875 0.04681396484375 0.04681396484375 0.04681396484375 0.046844482421875 0.046875 0.046875 0.046875 0.046905517578125 0.046966552734375 0.046966552734375 0.046966552734375 0.0469970703125 0.047027587890625 0.047027587890625 0.047027587890625 0.047088623046875 0.047119140625 0.047119140625 0.047119140625 0.047149658203125 0.047210693359375 0.047210693359375 0.047210693359375 0.0472412109375 0.047271728515625 0.047271728515625 0.047271728515625 0.047332763671875 0.04736328125 0.04736328125 0.04736328125 0.047393798828125 0.047454833984375 0.047454833984375 0.047454833984375 0.0474853515625 0.047515869140625 0.047515869140625 0.047515869140625 0.04754638671875 0.047607421875 0.047607421875 0.047607421875 0.047637939453125 0.04766845703125 0.04766845703125 0.04766845703125 0.0477294921875 0.047760009765625 0.047760009765625 0.047760009765625 0.04779052734375 0.0478515625 0.0478515625 0.0478515625 0.047882080078125 0.04791259765625 0.04791259765625 0.04791259765625 0.0479736328125 0.048004150390625 0.048004150390625 0.048004150390625 0.04803466796875 0.048065185546875 0.048065185546875 0.048065185546875 0.048126220703125 0.04815673828125 0.04815673828125 0.04815673828125 0.048187255859375 0.048248291015625 0.048248291015625 0.048248291015625 0.04827880859375 0.048309326171875 0.048309326171875 0.048309326171875 0.048370361328125 0.04840087890625 0.04840087890625 0.04840087890625 0.048431396484375 0.048492431640625 0.048492431640625 0.048492431640625 0.04852294921875 0.048553466796875 0.048553466796875 0.048553466796875 0.048614501953125 0.04864501953125 0.04864501953125 0.04864501953125 0.048675537109375 0.0487060546875 0.0487060546875 0.0487060546875 0.04876708984375 0.048797607421875 0.048797607421875 0.048797607421875 0.048828125 0.04888916015625 0.04888916015625 0.04888916015625 0.048919677734375 0.0489501953125 0.0489501953125 0.0489501953125 0.04901123046875 0.049041748046875 0.049041748046875 0.049041748046875 0.049072265625 0.04913330078125 0.04913330078125 0.04913330078125 0.049163818359375 0.0491943359375 0.0491943359375 0.0491943359375 0.049224853515625 0.049285888671875 0.049285888671875 0.049285888671875 0.04931640625 0.049346923828125 0.049346923828125 0.049346923828125 0.049407958984375 0.0494384765625 0.0494384765625 0.0494384765625 0.049468994140625 0.049530029296875 0.049530029296875 0.049530029296875 0.049560546875 0.049591064453125 0.049591064453125 0.049591064453125 0.049652099609375 0.0496826171875 0.0496826171875 0.0496826171875 0.049713134765625 0.049774169921875 0.049774169921875 0.049774169921875 0.0498046875 0.049835205078125 0.049835205078125 0.049835205078125 0.04986572265625 0.0499267578125 0.0499267578125 0.0499267578125 0.049957275390625 0.04998779296875 0.04998779296875 0.04998779296875 0.050048828125 0.050079345703125 0.050079345703125 0.050079345703125 0.05010986328125 0.0501708984375 0.0501708984375 0.0501708984375 0.050201416015625 0.05023193359375 0.05023193359375 0.05023193359375 0.05029296875 0.050323486328125 0.050323486328125 0.050323486328125 0.05035400390625 0.0504150390625 0.0504150390625 0.0504150390625 0.050445556640625 0.05047607421875 0.05047607421875 0.05047607421875 0.050506591796875 0.050567626953125 0.050567626953125 0.050567626953125 0.05059814453125 0.050628662109375 0.050628662109375 0.050628662109375 0.050689697265625 0.05072021484375 0.05072021484375 0.05072021484375 0.050750732421875 0.050811767578125 0.050811767578125 0.050811767578125 0.05084228515625 0.050872802734375 0.050872802734375 0.050872802734375 0.050933837890625 0.05096435546875 0.05096435546875 0.05096435546875 0.050994873046875 0.051025390625 0.051025390625 0.051025390625 0.05108642578125 0.051116943359375 0.051116943359375 0.051116943359375 0.0511474609375 0.05120849609375 0.05120849609375 0.05120849609375 0.051239013671875 0.05126953125 0.05126953125 0.05126953125 0.05133056640625 0.051361083984375 0.051361083984375 0.051361083984375 0.0513916015625 0.05145263671875 0.05145263671875 0.05145263671875 0.051483154296875 0.051513671875 0.051513671875 0.051513671875 0.05157470703125 0.051605224609375 0.051605224609375 0.051605224609375 0.0516357421875 0.051666259765625 0.051666259765625 0.051666259765625 0.051727294921875 0.0517578125 0.0517578125 0.0517578125 0.051788330078125 0.051849365234375 0.051849365234375 0.051849365234375 0.0518798828125 0.051910400390625 0.051910400390625 0.051910400390625 0.051971435546875 0.052001953125 0.052001953125 0.052001953125 0.052032470703125 0.052093505859375 0.052093505859375 0.052093505859375 0.0521240234375 0.052154541015625 0.052154541015625 0.052154541015625 0.05218505859375 0.05224609375 0.05224609375 0.05224609375 0.052276611328125 0.05230712890625 0.05230712890625 0.05230712890625 0.0523681640625 0.052398681640625 0.052398681640625 0.052398681640625 0.05242919921875 0.052490234375 0.052490234375 0.052490234375 0.052520751953125 0.05255126953125 0.05255126953125 0.05255126953125 0.0526123046875 0.052642822265625 0.052642822265625 0.052642822265625 0.05267333984375 0.052734375 0.052734375 0.052734375 0.052764892578125 0.05279541015625 0.05279541015625 0.05279541015625 0.052825927734375 0.052886962890625 0.052886962890625 0.052886962890625 0.05291748046875 0.052947998046875 0.052947998046875 0.052947998046875 0.053009033203125 0.05303955078125 0.05303955078125 0.05303955078125 0.053070068359375 0.053131103515625 0.053131103515625 0.053131103515625 0.05316162109375 0.053192138671875 0.053192138671875 0.053192138671875 0.053253173828125 0.05328369140625 0.05328369140625 0.05328369140625 0.053314208984375 0.053375244140625 0.053375244140625 0.053375244140625 0.05340576171875 0.053436279296875 0.053436279296875 0.053436279296875 0.053466796875 0.05352783203125 0.05352783203125 0.05352783203125 0.053558349609375 0.0535888671875 0.0535888671875 0.0535888671875 0.05364990234375 0.053680419921875 0.053680419921875 0.053680419921875 0.0537109375 0.05377197265625 0.05377197265625 0.05377197265625 0.053802490234375 0.0538330078125 0.0538330078125 0.0538330078125 0.05389404296875 0.053924560546875 0.053924560546875 0.053924560546875 0.053955078125 0.053985595703125 0.053985595703125 0.053985595703125 0.054046630859375 0.0540771484375 0.0540771484375 0.0540771484375 0.054107666015625 0.054168701171875 0.054168701171875 0.054168701171875 0.05419921875 0.054229736328125 0.054229736328125 0.054229736328125 0.054290771484375 0.0543212890625 0.0543212890625 0.0543212890625 0.054351806640625 0.054412841796875 0.054412841796875 0.054412841796875 0.054443359375 0.054473876953125 0.054473876953125 0.054473876953125 0.054534912109375 0.0545654296875 0.0545654296875 0.0545654296875 0.054595947265625 0.05462646484375 0.05462646484375 0.05462646484375 0.0546875 0.054718017578125 0.054718017578125 0.054718017578125 0.05474853515625 0.0548095703125 0.0548095703125 0.0548095703125 0.054840087890625 0.05487060546875 0.05487060546875 0.05487060546875 0.054931640625 0.054962158203125 0.054962158203125 0.054962158203125 0.05499267578125 0.0550537109375 0.0550537109375 0.0550537109375 0.055084228515625 0.05511474609375 0.05511474609375 0.05511474609375 0.055145263671875 0.055206298828125 0.055206298828125 0.055206298828125 0.05523681640625 0.055267333984375 0.055267333984375 0.055267333984375 0.055328369140625 0.05535888671875 0.05535888671875 0.05535888671875 0.055389404296875 0.055450439453125 0.055450439453125 0.055450439453125 0.05548095703125 0.055511474609375 0.055511474609375 0.055511474609375 0.055572509765625 0.05560302734375 0.05560302734375 0.05560302734375 0.055633544921875 0.055694580078125 0.055694580078125 0.055694580078125 0.05572509765625 0.055755615234375 0.055755615234375 0.055755615234375 0.0557861328125 0.05584716796875 0.05584716796875 0.05584716796875 0.055877685546875 0.055908203125 0.055908203125 0.055908203125 0.05596923828125 0.055999755859375 0.055999755859375 0.055999755859375 0.0560302734375 0.05609130859375 0.05609130859375 0.05609130859375 0.056121826171875 0.05615234375 0.05615234375 0.05615234375 0.05621337890625 0.056243896484375 0.056243896484375 0.056243896484375 0.0562744140625 0.056304931640625 0.056304931640625 0.056304931640625 0.056365966796875 0.056396484375 0.056396484375 0.056396484375 0.056427001953125 0.056488037109375 0.056488037109375 0.056488037109375 0.0565185546875 0.056549072265625 0.056549072265625 0.056549072265625 0.056610107421875 0.056640625 0.056640625 0.056640625 0.056671142578125 0.056732177734375 0.056732177734375 0.056732177734375 0.0567626953125 0.056793212890625 0.056793212890625 0.056793212890625 0.056854248046875 0.056884765625 0.056884765625 0.056884765625 0.056915283203125 0.05694580078125 0.05694580078125 0.05694580078125 0.0570068359375 0.057037353515625 0.057037353515625 0.057037353515625 0.05706787109375 0.05712890625 0.05712890625 0.05712890625 0.057159423828125 0.05718994140625 0.05718994140625 0.05718994140625 0.0572509765625 0.057281494140625 0.057281494140625 0.057281494140625 0.05731201171875 0.057373046875 0.057373046875 0.057373046875 0.057403564453125 0.05743408203125 0.05743408203125 0.05743408203125 0.0574951171875 0.057525634765625 0.057525634765625 0.057525634765625 0.05755615234375 0.057586669921875 0.057586669921875 0.057586669921875 0.057647705078125 0.05767822265625 0.05767822265625 0.05767822265625 0.057708740234375 0.057769775390625 0.057769775390625 0.057769775390625 0.05780029296875 0.057830810546875 0.057830810546875 0.057830810546875 0.057891845703125 0.05792236328125 0.05792236328125 0.05792236328125 0.057952880859375 0.058013916015625 0.058013916015625 0.058013916015625 0.05804443359375 0.058074951171875 0.058074951171875 0.058074951171875 0.05810546875 0.05816650390625 0.05816650390625 0.05816650390625 0.058197021484375 0.0582275390625 0.0582275390625 0.0582275390625 0.05828857421875 0.058319091796875 0.058319091796875 0.058319091796875 0.058349609375 0.05841064453125 0.05841064453125 0.05841064453125 0.058441162109375 0.0584716796875 0.0584716796875 0.0584716796875 0.05853271484375 0.058563232421875 0.058563232421875 0.058563232421875 0.05859375 0.05865478515625 0.05865478515625 0.05865478515625 0.058685302734375 0.0587158203125 0.0587158203125 0.0587158203125 0.058746337890625 0.058807373046875 0.058807373046875 0.058807373046875 0.058837890625 0.058868408203125 0.058868408203125 0.058868408203125 0.058929443359375 0.0589599609375 0.0589599609375 0.0589599609375 0.058990478515625 0.059051513671875 0.059051513671875 0.059051513671875 0.05908203125 0.059112548828125 0.059112548828125 0.059112548828125 0.059173583984375 0.0592041015625 0.0592041015625 0.0592041015625 0.059234619140625 0.05926513671875 0.05926513671875 0.05926513671875 0.059326171875 0.059356689453125 0.059356689453125 0.059356689453125 0.05938720703125 0.0594482421875 0.0594482421875 0.0594482421875 0.059478759765625 0.05950927734375 0.05950927734375 0.05950927734375 0.0595703125 0.059600830078125 0.059600830078125 0.059600830078125 0.05963134765625 0.0596923828125 0.0596923828125 0.0596923828125 0.059722900390625 0.05975341796875 0.05975341796875 0.05975341796875 0.059814453125 0.059844970703125 0.059844970703125 0.059844970703125 0.05987548828125 0.059906005859375 0.059906005859375 0.059906005859375 0.059967041015625 0.05999755859375 0.05999755859375 0.05999755859375 0.060028076171875 0.060089111328125 0.060089111328125 0.060089111328125 0.06011962890625 0.060150146484375 0.060150146484375 0.060150146484375 0.060211181640625 0.06024169921875 0.06024169921875 0.06024169921875 0.060272216796875 0.060333251953125 0.060333251953125 0.060333251953125 0.06036376953125 0.060394287109375 0.060394287109375 0.060394287109375 0.0604248046875 0.06048583984375 0.06048583984375 0.06048583984375 0.060516357421875 0.060546875 0.060546875 0.060546875 0.06060791015625 0.060638427734375 0.060638427734375 0.060638427734375 0.0606689453125 0.06072998046875 0.06072998046875 0.06072998046875 0.060760498046875 0.060791015625 0.060791015625 0.060791015625 0.06085205078125 0.060882568359375 0.060882568359375 0.060882568359375 0.0609130859375 0.06097412109375 0.06097412109375 0.06097412109375 0.061004638671875 0.06103515625 0.06103515625 0.06103515625 0.061065673828125 0.061126708984375 0.061126708984375 0.061126708984375 0.0611572265625 0.061187744140625 0.061187744140625 0.061187744140625 0.061248779296875 0.061279296875 0.061279296875 0.061279296875 0.061309814453125 0.061370849609375 0.061370849609375 0.061370849609375 0.0614013671875 0.061431884765625 0.061431884765625 0.061431884765625 0.061492919921875 0.0615234375 0.0615234375 0.0615234375 0.061553955078125 0.061614990234375 0.061614990234375 0.061614990234375 0.0616455078125 0.061676025390625 0.061676025390625 0.061676025390625 0.06170654296875 0.061767578125 0.061767578125 0.061767578125 0.061798095703125 0.06182861328125 0.06182861328125 0.06182861328125 0.0618896484375 0.061920166015625 0.061920166015625 0.061920166015625 0.06195068359375 0.06201171875 0.06201171875 0.06201171875 0.062042236328125 0.06207275390625 0.06207275390625 0.06207275390625 0.0621337890625 0.062164306640625 0.062164306640625 0.062164306640625 0.06219482421875 0.062225341796875 0.062225341796875 0.062225341796875 0.062286376953125 0.06231689453125 0.06231689453125 0.06231689453125 0.062347412109375 0.062408447265625 0.062408447265625 0.062408447265625 0.06243896484375 0.062469482421875 0.062469482421875 0.062469482421875 0.0625 0.06256103515625 0.06256103515625 0.06256103515625 0.0626220703125 0.0626220703125 0.0626220703125 0.0626220703125 0.06268310546875 0.062744140625 0.062744140625 0.062744140625 0.062744140625 0.06280517578125 0.06280517578125 0.06280517578125 0.0628662109375 0.0628662109375 0.0628662109375 0.0628662109375 0.06292724609375 0.06298828125 0.06298828125 0.06298828125 0.06298828125 0.06304931640625 0.06304931640625 0.06304931640625 0.0631103515625 0.0631103515625 0.0631103515625 0.0631103515625 0.06317138671875 0.06317138671875 0.06317138671875 0.06317138671875 0.063232421875 0.06329345703125 0.06329345703125 0.06329345703125 0.06329345703125 0.0633544921875 0.0633544921875 0.0633544921875 0.06341552734375 0.06341552734375 0.06341552734375 0.06341552734375 0.0634765625 0.06353759765625 0.06353759765625 0.06353759765625 0.06353759765625 0.0635986328125 0.0635986328125 0.0635986328125 0.06365966796875 0.06365966796875 0.06365966796875 0.06365966796875 0.063720703125 0.06378173828125 0.06378173828125 0.06378173828125 0.06378173828125 0.0638427734375 0.0638427734375 0.0638427734375 0.06390380859375 0.06390380859375 0.06390380859375 0.06390380859375 0.06396484375 0.06402587890625 0.06402587890625 0.06402587890625 0.06402587890625 0.0640869140625 0.0640869140625 0.0640869140625 0.06414794921875 0.06414794921875 0.06414794921875 0.06414794921875 0.064208984375 0.06427001953125 0.06427001953125 0.06427001953125 0.06427001953125 0.0643310546875 0.0643310546875 0.0643310546875 0.0643310546875 0.06439208984375 0.06439208984375 0.06439208984375 0.064453125 0.064453125 0.064453125 0.064453125 0.06451416015625 0.0645751953125 0.0645751953125 0.0645751953125 0.0645751953125 0.06463623046875 0.06463623046875 0.06463623046875 0.064697265625 0.064697265625 0.064697265625 0.064697265625 0.06475830078125 0.0648193359375 0.0648193359375 0.0648193359375 0.0648193359375 0.06488037109375 0.06488037109375 0.06488037109375 0.06494140625 0.06494140625 0.06494140625 0.06494140625 0.06500244140625 0.0650634765625 0.0650634765625 0.0650634765625 0.0650634765625 0.06512451171875 0.06512451171875 0.06512451171875 0.065185546875 0.065185546875 0.065185546875 0.065185546875 0.06524658203125 0.0653076171875 0.0653076171875 0.0653076171875 0.0653076171875 0.06536865234375 0.06536865234375 0.06536865234375 0.0654296875 0.0654296875 0.0654296875 0.0654296875 0.06549072265625 0.06549072265625 0.06549072265625 0.06549072265625 0.0655517578125 0.06561279296875 0.06561279296875 0.06561279296875 0.06561279296875 0.065673828125 0.065673828125 0.065673828125 0.06573486328125 0.06573486328125 0.06573486328125 0.06573486328125 0.0657958984375 0.06585693359375 0.06585693359375 0.06585693359375 0.06585693359375 0.06591796875 0.06591796875 0.06591796875 0.06597900390625 0.06597900390625 0.06597900390625 0.06597900390625 0.0660400390625 0.06610107421875 0.06610107421875 0.06610107421875 0.06610107421875 0.066162109375 0.066162109375 0.066162109375 0.06622314453125 0.06622314453125 0.06622314453125 0.06622314453125 0.0662841796875 0.06634521484375 0.06634521484375 0.06634521484375 0.06634521484375 0.06640625 0.06640625 0.06640625 0.06646728515625 0.06646728515625 0.06646728515625 0.06646728515625 0.0665283203125 0.06658935546875 0.06658935546875 0.06658935546875 0.06658935546875 0.066650390625 0.066650390625 0.066650390625 0.066650390625 0.06671142578125 0.06671142578125 0.06671142578125 0.0667724609375 0.0667724609375 0.0667724609375 0.0667724609375 0.06683349609375 0.06689453125 0.06689453125 0.06689453125 0.06689453125 0.06695556640625 0.06695556640625 0.06695556640625 0.0670166015625 0.0670166015625 0.0670166015625 0.0670166015625 0.06707763671875 0.067138671875 0.067138671875 0.067138671875 0.067138671875 0.06719970703125 0.06719970703125 0.06719970703125 0.0672607421875 0.0672607421875 0.0672607421875 0.0672607421875 0.06732177734375 0.0673828125 0.0673828125 0.0673828125 0.0673828125 0.06744384765625 0.06744384765625 0.06744384765625 0.0675048828125 0.0675048828125 0.0675048828125 0.0675048828125 0.06756591796875 0.067626953125 0.067626953125 0.067626953125 0.067626953125 0.06768798828125 0.06768798828125 0.06768798828125 0.0677490234375 0.0677490234375 0.0677490234375 0.0677490234375 0.06781005859375 0.06781005859375 0.06781005859375 0.06781005859375 0.06787109375 0.06793212890625 0.06793212890625 0.06793212890625 0.06793212890625 0.0679931640625 0.0679931640625 0.0679931640625 0.06805419921875 0.06805419921875 0.06805419921875 0.06805419921875 0.068115234375 0.06817626953125 0.06817626953125 0.06817626953125 0.06817626953125 0.0682373046875 0.0682373046875 0.0682373046875 0.06829833984375 0.06829833984375 0.06829833984375 0.06829833984375 0.068359375 0.06842041015625 0.06842041015625 0.06842041015625 0.06842041015625 0.0684814453125 0.0684814453125 0.0684814453125 0.06854248046875 0.06854248046875 0.06854248046875 0.06854248046875 0.068603515625 0.06866455078125 0.06866455078125 0.06866455078125 0.06866455078125 0.0687255859375 0.0687255859375 0.0687255859375 0.06878662109375 0.06878662109375 0.06878662109375 0.06878662109375 0.06884765625 0.06890869140625 0.06890869140625 0.06890869140625 0.06890869140625 0.0689697265625 0.0689697265625 0.0689697265625 0.0689697265625 0.06903076171875 0.06903076171875 0.06903076171875 0.069091796875 0.069091796875 0.069091796875 0.069091796875 0.06915283203125 0.0692138671875 0.0692138671875 0.0692138671875 0.0692138671875 0.06927490234375 0.06927490234375 0.06927490234375 0.0693359375 0.0693359375 0.0693359375 0.0693359375 0.06939697265625 0.0694580078125 0.0694580078125 0.0694580078125 0.0694580078125 0.06951904296875 0.06951904296875 0.06951904296875 0.069580078125 0.069580078125 0.069580078125 0.069580078125 0.06964111328125 0.0697021484375 0.0697021484375 0.0697021484375 0.0697021484375 0.06976318359375 0.06976318359375 0.06976318359375 0.06982421875 0.06982421875 0.06982421875 0.06982421875 0.06988525390625 0.0699462890625 0.0699462890625 0.0699462890625 0.0699462890625 0.07000732421875 0.07000732421875 0.07000732421875 0.070068359375 0.070068359375 0.070068359375 0.070068359375 0.07012939453125 0.0701904296875 0.0701904296875 0.0701904296875 0.0701904296875 0.07025146484375 0.07025146484375 0.07025146484375 0.07025146484375 0.0703125 0.0703125 0.0703125 0.07037353515625 0.07037353515625 0.07037353515625 0.07037353515625 0.0704345703125 0.07049560546875 0.07049560546875 0.07049560546875 0.07049560546875 0.070556640625 0.070556640625 0.070556640625 0.07061767578125 0.07061767578125 0.07061767578125 0.07061767578125 0.0706787109375 0.07073974609375 0.07073974609375 0.07073974609375 0.07073974609375 0.07080078125 0.07080078125 0.07080078125 0.07086181640625 0.07086181640625 0.07086181640625 0.07086181640625 0.0709228515625 0.07098388671875 0.07098388671875 0.07098388671875 0.07098388671875 0.071044921875 0.071044921875 0.071044921875 0.07110595703125 0.07110595703125 0.07110595703125 0.07110595703125 0.0711669921875 0.07122802734375 0.07122802734375 0.07122802734375 0.07122802734375 0.0712890625 0.0712890625 0.0712890625 0.07135009765625 0.07135009765625 0.07135009765625 0.07135009765625 0.0714111328125 0.0714111328125 0.0714111328125 0.0714111328125 0.07147216796875 0.071533203125 0.071533203125 0.071533203125 0.071533203125 0.07159423828125 0.07159423828125 0.07159423828125 0.0716552734375 0.0716552734375 0.0716552734375 0.0716552734375 0.07171630859375 0.07177734375 0.07177734375 0.07177734375 0.07177734375 0.07183837890625 0.07183837890625 0.07183837890625 0.0718994140625 0.0718994140625 0.0718994140625 0.0718994140625 0.07196044921875 0.072021484375 0.072021484375 0.072021484375 0.072021484375 0.07208251953125 0.07208251953125 0.07208251953125 0.0721435546875 0.0721435546875 0.0721435546875 0.0721435546875 0.07220458984375 0.072265625 0.072265625 0.072265625 0.072265625 0.07232666015625 0.07232666015625 0.07232666015625 0.0723876953125 0.0723876953125 0.0723876953125 0.0723876953125 0.07244873046875 0.072509765625 0.072509765625 0.072509765625 0.072509765625 0.07257080078125 0.07257080078125 0.07257080078125 0.07257080078125 0.0726318359375 0.0726318359375 0.0726318359375 0.07269287109375 0.07269287109375 0.07269287109375 0.07269287109375 0.07275390625 0.07281494140625 0.07281494140625 0.07281494140625 0.07281494140625 0.0728759765625 0.0728759765625 0.0728759765625 0.07293701171875 0.07293701171875 0.07293701171875 0.07293701171875 0.072998046875 0.07305908203125 0.07305908203125 0.07305908203125 0.07305908203125 0.0731201171875 0.0731201171875 0.0731201171875 0.07318115234375 0.07318115234375 0.07318115234375 0.07318115234375 0.0732421875 0.07330322265625 0.07330322265625 0.07330322265625 0.07330322265625 0.0733642578125 0.0733642578125 0.0733642578125 0.07342529296875 0.07342529296875 0.07342529296875 0.07342529296875 0.073486328125 0.07354736328125 0.07354736328125 0.07354736328125 0.07354736328125 0.0736083984375 0.0736083984375 0.0736083984375 0.07366943359375 0.07366943359375 0.07366943359375 0.07366943359375 0.07373046875 0.07373046875 0.07373046875 0.07373046875 0.07379150390625 0.0738525390625 0.0738525390625 0.0738525390625 0.0738525390625 0.07391357421875 0.07391357421875 0.07391357421875 0.073974609375 0.073974609375 0.073974609375 0.073974609375 0.07403564453125 0.0740966796875 0.0740966796875 0.0740966796875 0.0740966796875 0.07415771484375 0.07415771484375 0.07415771484375 0.07421875 0.07421875 0.07421875 0.07421875 0.07427978515625 0.0743408203125 0.0743408203125 0.0743408203125 0.0743408203125 0.07440185546875 0.07440185546875 0.07440185546875 0.074462890625 0.074462890625 0.074462890625 0.074462890625 0.07452392578125 0.0745849609375 0.0745849609375 0.0745849609375 0.0745849609375 0.07464599609375 0.07464599609375 0.07464599609375 0.07470703125 0.07470703125 0.07470703125 0.07470703125 0.07476806640625 0.0748291015625 0.0748291015625 0.0748291015625 0.0748291015625 0.07489013671875 0.07489013671875 0.07489013671875 0.07489013671875 0.074951171875 0.074951171875 0.074951171875 0.07501220703125 0.07501220703125 0.07501220703125 0.07501220703125 0.0750732421875 0.07513427734375 0.07513427734375 0.07513427734375 0.07513427734375 0.0751953125 0.0751953125 0.0751953125 0.07525634765625 0.07525634765625 0.07525634765625 0.07525634765625 0.0753173828125 0.07537841796875 0.07537841796875 0.07537841796875 0.07537841796875 0.075439453125 0.075439453125 0.075439453125 0.07550048828125 0.07550048828125 0.07550048828125 0.07550048828125 0.0755615234375 0.07562255859375 0.07562255859375 0.07562255859375 0.07562255859375 0.07568359375 0.07568359375 0.07568359375 0.07574462890625 0.07574462890625 0.07574462890625 0.07574462890625 0.0758056640625 0.07586669921875 0.07586669921875 0.07586669921875 0.07586669921875 0.075927734375 0.075927734375 0.075927734375 0.07598876953125 0.07598876953125 0.07598876953125 0.07598876953125 0.0760498046875 0.0760498046875 0.0760498046875 0.0760498046875 0.07611083984375 0.076171875 0.076171875 0.076171875 0.076171875 0.07623291015625 0.07623291015625 0.07623291015625 0.0762939453125 0.0762939453125 0.0762939453125 0.0762939453125 0.07635498046875 0.076416015625 0.076416015625 0.076416015625 0.076416015625 0.07647705078125 0.07647705078125 0.07647705078125 0.0765380859375 0.0765380859375 0.0765380859375 0.0765380859375 0.07659912109375 0.07666015625 0.07666015625 0.07666015625 0.07666015625 0.07672119140625 0.07672119140625 0.07672119140625 0.0767822265625 0.0767822265625 0.0767822265625 0.0767822265625 0.07684326171875 0.076904296875 0.076904296875 0.076904296875 0.076904296875 0.07696533203125 0.07696533203125 0.07696533203125 0.0770263671875 0.0770263671875 0.0770263671875 0.0770263671875 0.07708740234375 0.0771484375 0.0771484375 0.0771484375 0.0771484375 0.07720947265625 0.07720947265625 0.07720947265625 0.0772705078125 0.0772705078125 0.0772705078125 0.0772705078125 0.07733154296875 0.07733154296875 0.07733154296875 0.07733154296875 0.077392578125 0.07745361328125 0.07745361328125 0.07745361328125 0.07745361328125 0.0775146484375 0.0775146484375 0.0775146484375 0.07757568359375 0.07757568359375 0.07757568359375 0.07757568359375 0.07763671875 0.07769775390625 0.07769775390625 0.07769775390625 0.07769775390625 0.0777587890625 0.0777587890625 0.0777587890625 0.07781982421875 0.07781982421875 0.07781982421875 0.07781982421875 0.077880859375 0.07794189453125 0.07794189453125 0.07794189453125 0.07794189453125 0.0780029296875 0.0780029296875 0.0780029296875 0.07806396484375 0.07806396484375 0.07806396484375 0.07806396484375 0.078125 0.07818603515625 0.07818603515625 0.07818603515625 0.07818603515625 0.0782470703125 0.0782470703125 0.0782470703125 0.07830810546875 0.07830810546875 0.07830810546875 0.07830810546875 0.078369140625 0.07843017578125 0.07843017578125 0.07843017578125 0.07843017578125 0.0784912109375 0.0784912109375 0.0784912109375 0.0784912109375 0.07855224609375 0.07855224609375 0.07855224609375 0.07861328125 0.07861328125 0.07861328125 0.07861328125 0.07867431640625 0.0787353515625 0.0787353515625 0.0787353515625 0.0787353515625 0.07879638671875 0.07879638671875 0.07879638671875 0.078857421875 0.078857421875 0.078857421875 0.078857421875 0.07891845703125 0.0789794921875 0.0789794921875 0.0789794921875 0.0789794921875 0.07904052734375 0.07904052734375 0.07904052734375 0.0791015625 0.0791015625 0.0791015625 0.0791015625 0.07916259765625 0.0792236328125 0.0792236328125 0.0792236328125 0.0792236328125 0.07928466796875 0.07928466796875 0.07928466796875 0.079345703125 0.079345703125 0.079345703125 0.079345703125 0.07940673828125 0.0794677734375 0.0794677734375 0.0794677734375 0.0794677734375 0.07952880859375 0.07952880859375 0.07952880859375 0.07958984375 0.07958984375 0.07958984375 0.07958984375 0.07965087890625 0.07965087890625 0.07965087890625 0.07965087890625 0.0797119140625 0.07977294921875 0.07977294921875 0.07977294921875 0.07977294921875 0.079833984375 0.079833984375 0.079833984375 0.07989501953125 0.07989501953125 0.07989501953125 0.07989501953125 0.0799560546875 0.08001708984375 0.08001708984375 0.08001708984375 0.08001708984375 0.080078125 0.080078125 0.080078125 0.08013916015625 0.08013916015625 0.08013916015625 0.08013916015625 0.0802001953125 0.08026123046875 0.08026123046875 0.08026123046875 0.08026123046875 0.080322265625 0.080322265625 0.080322265625 0.08038330078125 0.08038330078125 0.08038330078125 0.08038330078125 0.0804443359375 0.08050537109375 0.08050537109375 0.08050537109375 0.08050537109375 0.08056640625 0.08056640625 0.08056640625 0.08062744140625 0.08062744140625 0.08062744140625 0.08062744140625 0.0806884765625 0.08074951171875 0.08074951171875 0.08074951171875 0.08074951171875 0.080810546875 0.080810546875 0.080810546875 0.080810546875 0.08087158203125 0.08087158203125 0.08087158203125 0.0809326171875 0.0809326171875 0.0809326171875 0.0809326171875 0.08099365234375 0.0810546875 0.0810546875 0.0810546875 0.0810546875 0.08111572265625 0.08111572265625 0.08111572265625 0.0811767578125 0.0811767578125 0.0811767578125 0.0811767578125 0.08123779296875 0.081298828125 0.081298828125 0.081298828125 0.081298828125 0.08135986328125 0.08135986328125 0.08135986328125 0.0814208984375 0.0814208984375 0.0814208984375 0.0814208984375 0.08148193359375 0.08154296875 0.08154296875 0.08154296875 0.08154296875 0.08160400390625 0.08160400390625 0.08160400390625 0.0816650390625 0.0816650390625 0.0816650390625 0.0816650390625 0.08172607421875 0.081787109375 0.081787109375 0.081787109375 0.081787109375 0.08184814453125 0.08184814453125 0.08184814453125 0.0819091796875 0.0819091796875 0.0819091796875 0.0819091796875 0.0819091796875 0.08197021484375 0.08197021484375 0.08197021484375 0.08209228515625 0.08209228515625 0.08209228515625 0.08209228515625 0.08209228515625 0.0821533203125 0.0821533203125 0.0821533203125 0.08221435546875 0.08221435546875 0.08221435546875 0.08221435546875 0.08221435546875 0.08233642578125 0.08233642578125 0.08233642578125 0.0823974609375 0.0823974609375 0.0823974609375 0.0823974609375 0.0823974609375 0.08245849609375 0.08245849609375 0.08245849609375 0.08258056640625 0.08258056640625 0.08258056640625 0.08258056640625 0.08258056640625 0.0826416015625 0.0826416015625 0.0826416015625 0.08270263671875 0.08270263671875 0.08270263671875 0.08270263671875 0.08270263671875 0.08282470703125 0.08282470703125 0.08282470703125 0.0828857421875 0.0828857421875 0.0828857421875 0.0828857421875 0.0828857421875 0.08294677734375 0.08294677734375 0.08294677734375 0.08306884765625 0.08306884765625 0.08306884765625 0.08306884765625 0.08306884765625 0.0831298828125 0.0831298828125 0.0831298828125 0.08319091796875 0.08319091796875 0.08319091796875 0.08319091796875 0.08319091796875 0.083251953125 0.083251953125 0.083251953125 0.0833740234375 0.0833740234375 0.0833740234375 0.0833740234375 0.0833740234375 0.08343505859375 0.08343505859375 0.08343505859375 0.08349609375 0.08349609375 0.08349609375 0.08349609375 0.08349609375 0.0836181640625 0.0836181640625 0.0836181640625 0.08367919921875 0.08367919921875 0.08367919921875 0.08367919921875 0.08367919921875 0.083740234375 0.083740234375 0.083740234375 0.0838623046875 0.0838623046875 0.0838623046875 0.0838623046875 0.0838623046875 0.08392333984375 0.08392333984375 0.08392333984375 0.083984375 0.083984375 0.083984375 0.083984375 0.083984375 0.0841064453125 0.0841064453125 0.0841064453125 0.08416748046875 0.08416748046875 0.08416748046875 0.08416748046875 0.08416748046875 0.084228515625 0.084228515625 0.084228515625 0.08428955078125 0.08428955078125 0.08428955078125 0.08428955078125 0.08428955078125 0.08441162109375 0.08441162109375 0.08441162109375 0.08447265625 0.08447265625 0.08447265625 0.08447265625 0.08447265625 0.08453369140625 0.08453369140625 0.08453369140625 0.08465576171875 0.08465576171875 0.08465576171875 0.08465576171875 0.08465576171875 0.084716796875 0.084716796875 0.084716796875 0.08477783203125 0.08477783203125 0.08477783203125 0.08477783203125 0.08477783203125 0.08489990234375 0.08489990234375 0.08489990234375 0.0849609375 0.0849609375 0.0849609375 0.0849609375 0.0849609375 0.08502197265625 0.08502197265625 0.08502197265625 0.08514404296875 0.08514404296875 0.08514404296875 0.08514404296875 0.08514404296875 0.085205078125 0.085205078125 0.085205078125 0.08526611328125 0.08526611328125 0.08526611328125 0.08526611328125 0.08526611328125 0.08538818359375 0.08538818359375 0.08538818359375 0.08544921875 0.08544921875 0.08544921875 0.08544921875 0.08544921875 0.08551025390625 0.08551025390625 0.08551025390625 0.0855712890625 0.0855712890625 0.0855712890625 0.0855712890625 0.0855712890625 0.085693359375 0.085693359375 0.085693359375 0.08575439453125 0.08575439453125 0.08575439453125 0.08575439453125 0.08575439453125 0.0858154296875 0.0858154296875 0.0858154296875 0.0859375 0.0859375 0.0859375 0.0859375 0.0859375 0.08599853515625 0.08599853515625 0.08599853515625 0.0860595703125 0.0860595703125 0.0860595703125 0.0860595703125 0.0860595703125 0.086181640625 0.086181640625 0.086181640625 0.08624267578125 0.08624267578125 0.08624267578125 0.08624267578125 0.08624267578125 0.0863037109375 0.0863037109375 0.0863037109375 0.08642578125 0.08642578125 0.08642578125 0.08642578125 0.08642578125 0.08648681640625 0.08648681640625 0.08648681640625 0.0865478515625 0.0865478515625 0.0865478515625 0.0865478515625 0.0865478515625 0.086669921875 0.086669921875 0.086669921875 0.08673095703125 0.08673095703125 0.08673095703125 0.08673095703125 0.08673095703125 0.0867919921875 0.0867919921875 0.0867919921875 0.08685302734375 0.08685302734375 0.08685302734375 0.08685302734375 0.08685302734375 0.08697509765625 0.08697509765625 0.08697509765625 0.0870361328125 0.0870361328125 0.0870361328125 0.0870361328125 0.0870361328125 0.08709716796875 0.08709716796875 0.08709716796875 0.08721923828125 0.08721923828125 0.08721923828125 0.08721923828125 0.08721923828125 0.0872802734375 0.0872802734375 0.0872802734375 0.08734130859375 0.08734130859375 0.08734130859375 0.08734130859375 0.08734130859375 0.08746337890625 0.08746337890625 0.08746337890625 0.0875244140625 0.0875244140625 0.0875244140625 0.0875244140625 0.0875244140625 0.08758544921875 0.08758544921875 0.08758544921875 0.08770751953125 0.08770751953125 0.08770751953125 0.08770751953125 0.08770751953125 0.0877685546875 0.0877685546875 0.0877685546875 0.08782958984375 0.08782958984375 0.08782958984375 0.08782958984375 0.08782958984375 0.087890625 0.087890625 0.087890625 0.0880126953125 0.0880126953125 0.0880126953125 0.0880126953125 0.0880126953125 0.08807373046875 0.08807373046875 0.08807373046875 0.088134765625 0.088134765625 0.088134765625 0.088134765625 0.088134765625 0.0882568359375 0.0882568359375 0.0882568359375 0.08831787109375 0.08831787109375 0.08831787109375 0.08831787109375 0.08831787109375 0.08837890625 0.08837890625 0.08837890625 0.0885009765625 0.0885009765625 0.0885009765625 0.0885009765625 0.0885009765625 0.08856201171875 0.08856201171875 0.08856201171875 0.088623046875 0.088623046875 0.088623046875 0.088623046875 0.088623046875 0.0887451171875 0.0887451171875 0.0887451171875 0.08880615234375 0.08880615234375 0.08880615234375 0.08880615234375 0.08880615234375 0.0888671875 0.0888671875 0.0888671875 0.0889892578125 0.0889892578125 0.0889892578125 0.0889892578125 0.0889892578125 0.08905029296875 0.08905029296875 0.08905029296875 0.089111328125 0.089111328125 0.089111328125 0.089111328125 0.089111328125 0.08917236328125 0.08917236328125 0.08917236328125 0.08929443359375 0.08929443359375 0.08929443359375 0.08929443359375 0.08929443359375 0.08935546875 0.08935546875 0.08935546875 0.08941650390625 0.08941650390625 0.08941650390625 0.08941650390625 0.08941650390625 0.08953857421875 0.08953857421875 0.08953857421875 0.089599609375 0.089599609375 0.089599609375 0.089599609375 0.089599609375 0.08966064453125 0.08966064453125 0.08966064453125 0.08978271484375 0.08978271484375 0.08978271484375 0.08978271484375 0.08978271484375 0.08984375 0.08984375 0.08984375 0.08990478515625 0.08990478515625 0.08990478515625 0.08990478515625 0.08990478515625 0.09002685546875 0.09002685546875 0.09002685546875 0.090087890625 0.090087890625 0.090087890625 0.090087890625 0.090087890625 0.09014892578125 0.09014892578125 0.09014892578125 0.0902099609375 0.0902099609375 0.0902099609375 0.0902099609375 0.0902099609375 0.09033203125 0.09033203125 0.09033203125 0.09039306640625 0.09039306640625 0.09039306640625 0.09039306640625 0.09039306640625 0.0904541015625 0.0904541015625 0.0904541015625 0.090576171875 0.090576171875 0.090576171875 0.090576171875 0.090576171875 0.09063720703125 0.09063720703125 0.09063720703125 0.0906982421875 0.0906982421875 0.0906982421875 0.0906982421875 0.0906982421875 0.0908203125 0.0908203125 0.0908203125 0.09088134765625 0.09088134765625 0.09088134765625 0.09088134765625 0.09088134765625 0.0909423828125 0.0909423828125 0.0909423828125 0.091064453125 0.091064453125 0.091064453125 0.091064453125 0.091064453125 0.09112548828125 0.09112548828125 0.09112548828125 0.0911865234375 0.0911865234375 0.0911865234375 0.0911865234375 0.0911865234375 0.09130859375 0.09130859375 0.09130859375 0.09136962890625 0.09136962890625 0.09136962890625 0.09136962890625 0.09136962890625 0.0914306640625 0.0914306640625 0.0914306640625 0.09149169921875 0.09149169921875 0.09149169921875 0.09149169921875 0.09149169921875 0.09161376953125 0.09161376953125 0.09161376953125 0.0916748046875 0.0916748046875 0.0916748046875 0.0916748046875 0.0916748046875 0.09173583984375 0.09173583984375 0.09173583984375 0.09185791015625 0.09185791015625 0.09185791015625 0.09185791015625 0.09185791015625 0.0919189453125 0.0919189453125 0.0919189453125 0.09197998046875 0.09197998046875 0.09197998046875 0.09197998046875 0.09197998046875 0.09210205078125 0.09210205078125 0.09210205078125 0.0921630859375 0.0921630859375 0.0921630859375 0.0921630859375 0.0921630859375 0.09222412109375 0.09222412109375 0.09222412109375 0.09234619140625 0.09234619140625 0.09234619140625 0.09234619140625 0.09234619140625 0.0924072265625 0.0924072265625 0.0924072265625 0.09246826171875 0.09246826171875 0.09246826171875 0.09246826171875 0.09246826171875 0.09259033203125 0.09259033203125 0.09259033203125 0.0926513671875 0.0926513671875 0.0926513671875 0.0926513671875 0.0926513671875 0.09271240234375 0.09271240234375 0.09271240234375 0.0927734375 0.0927734375 0.0927734375 0.0927734375 0.0927734375 0.0928955078125 0.0928955078125 0.0928955078125 0.09295654296875 0.09295654296875 0.09295654296875 0.09295654296875 0.09295654296875 0.093017578125 0.093017578125 0.093017578125 0.0931396484375 0.0931396484375 0.0931396484375 0.0931396484375 0.0931396484375 0.09320068359375 0.09320068359375 0.09320068359375 0.09326171875 0.09326171875 0.09326171875 0.09326171875 0.09326171875 0.0933837890625 0.0933837890625 0.0933837890625 0.09344482421875 0.09344482421875 0.09344482421875 0.09344482421875 0.09344482421875 0.093505859375 0.093505859375 0.093505859375 0.0936279296875 0.0936279296875 0.0936279296875 0.0936279296875 0.0936279296875 0.09368896484375 0.09368896484375 0.09368896484375 0.09375 0.09375 0.09375 0.09375 0.09375 0.09381103515625 0.09381103515625 0.09381103515625 0.09393310546875 0.09393310546875 0.09393310546875 0.09393310546875 0.09393310546875 0.093994140625 0.093994140625 0.093994140625 0.09405517578125 0.09405517578125 0.09405517578125 0.09405517578125 0.09405517578125 0.09417724609375 0.09417724609375 0.09417724609375 0.09423828125 0.09423828125 0.09423828125 0.09423828125 0.09423828125 0.09429931640625 0.09429931640625 0.09429931640625 0.09442138671875 0.09442138671875 0.09442138671875 0.09442138671875 0.09442138671875 0.094482421875 0.094482421875 0.094482421875 0.09454345703125 0.09454345703125 0.09454345703125 0.09454345703125 0.09454345703125 0.09466552734375 0.09466552734375 0.09466552734375 0.0947265625 0.0947265625 0.0947265625 0.0947265625 0.0947265625 0.09478759765625 0.09478759765625 0.09478759765625 0.09490966796875 0.09490966796875 0.09490966796875 0.09490966796875 0.09490966796875 0.094970703125 0.094970703125 0.094970703125 0.09503173828125 0.09503173828125 0.09503173828125 0.09503173828125 0.09503173828125 0.0950927734375 0.0950927734375 0.0950927734375 0.09521484375 0.09521484375 0.09521484375 0.09521484375 0.09521484375 0.09527587890625 0.09527587890625 0.09527587890625 0.0953369140625 0.0953369140625 0.0953369140625 0.0953369140625 0.0953369140625 0.095458984375 0.095458984375 0.095458984375 0.09552001953125 0.09552001953125 0.09552001953125 0.09552001953125 0.09552001953125 0.0955810546875 0.0955810546875 0.0955810546875 0.095703125 0.095703125 0.095703125 0.095703125 0.095703125 0.09576416015625 0.09576416015625 0.09576416015625 0.0958251953125 0.0958251953125 0.0958251953125 0.0958251953125 0.0958251953125 0.095947265625 0.095947265625 0.095947265625 0.09600830078125 0.09600830078125 0.09600830078125 0.09600830078125 0.09600830078125 0.0960693359375 0.0960693359375 0.0960693359375 0.09613037109375 0.09613037109375 0.09613037109375 0.09613037109375 0.09613037109375 0.09625244140625 0.09625244140625 0.09625244140625 0.0963134765625 0.0963134765625 0.0963134765625 0.0963134765625 0.0963134765625 0.09637451171875 0.09637451171875 0.09637451171875 0.09649658203125 0.09649658203125 0.09649658203125 0.09649658203125 0.09649658203125 0.0965576171875 0.0965576171875 0.0965576171875 0.09661865234375 0.09661865234375 0.09661865234375 0.09661865234375 0.09661865234375 0.09674072265625 0.09674072265625 0.09674072265625 0.0968017578125 0.0968017578125 0.0968017578125 0.0968017578125 0.0968017578125 0.09686279296875 0.09686279296875 0.09686279296875 0.09698486328125 0.09698486328125 0.09698486328125 0.09698486328125 0.09698486328125 0.0970458984375 0.0970458984375 0.0970458984375 0.09710693359375 0.09710693359375 0.09710693359375 0.09710693359375 0.09710693359375 0.09722900390625 0.09722900390625 0.09722900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.0972900390625 0.09735107421875 0.09735107421875 0.09735107421875 0.097412109375 0.097412109375 0.097412109375 0.097412109375 0.097412109375 0.0975341796875 0.0975341796875 0.0975341796875 0.09759521484375 0.09759521484375 0.09759521484375 0.09759521484375 0.09759521484375 0.09765625 0.09765625 0.09765625 0.0977783203125 0.0977783203125 0.0977783203125 0.0977783203125 0.0977783203125 0.09783935546875 0.09783935546875 0.09783935546875 0.097900390625 0.097900390625 0.097900390625 0.097900390625 0.097900390625 0.0980224609375 0.0980224609375 0.0980224609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09808349609375 0.09814453125 0.09814453125 0.09814453125 0.0982666015625 0.0982666015625 0.0982666015625 0.0982666015625 0.0982666015625 0.09832763671875 0.09832763671875 0.09832763671875 0.098388671875 0.098388671875 0.098388671875 0.098388671875 0.098388671875 0.09844970703125 0.09844970703125 0.09844970703125 0.09857177734375 0.09857177734375 0.09857177734375 0.09857177734375 0.09857177734375 0.0986328125 0.0986328125 0.0986328125 0.09869384765625 0.09869384765625 0.09869384765625 0.09869384765625 0.09869384765625 0.09881591796875 0.09881591796875 0.09881591796875 0.098876953125 0.098876953125 0.098876953125 0.098876953125 0.098876953125 0.09893798828125 0.09893798828125 0.09893798828125 0.09906005859375 0.09906005859375 0.09906005859375 0.09906005859375 0.09906005859375 0.09912109375 0.09912109375 0.09912109375 0.09918212890625 0.09918212890625 0.09918212890625 0.09918212890625 0.09918212890625 0.09930419921875 0.09930419921875 0.09930419921875 0.099365234375 0.099365234375 0.099365234375 0.099365234375 0.099365234375 0.09942626953125 0.09942626953125 0.09942626953125 0.09954833984375 0.09954833984375 0.09954833984375 0.09954833984375 0.09954833984375 0.099609375 0.099609375 0.099609375 0.09967041015625 0.09967041015625 0.09967041015625 0.09967041015625 0.09967041015625 0.0997314453125 0.0997314453125 0.0997314453125 0.099853515625 0.099853515625 0.099853515625 0.099853515625 0.099853515625 0.09991455078125 0.09991455078125 0.09991455078125 0.0999755859375 0.0999755859375 0.0999755859375 0.0999755859375 0.0999755859375 0.10009765625 0.10009765625 0.10009765625 0.10015869140625 0.10015869140625 0.10015869140625 0.10015869140625 0.10015869140625 0.1002197265625 0.1002197265625 0.1002197265625 0.100341796875 0.100341796875 0.100341796875 0.100341796875 0.100341796875 0.10040283203125 0.10040283203125 0.10040283203125 0.1004638671875 0.1004638671875 0.1004638671875 0.1004638671875 0.1004638671875 0.1005859375 0.1005859375 0.1005859375 0.10064697265625 0.10064697265625 0.10064697265625 0.10064697265625 0.10064697265625 0.1007080078125 0.1007080078125 0.1007080078125 0.100830078125 0.100830078125 0.100830078125 0.100830078125 0.100830078125 0.10089111328125 0.10089111328125 0.10089111328125 0.1009521484375 0.1009521484375 0.1009521484375 0.1009521484375 0.1009521484375 0.10101318359375 0.10101318359375 0.10101318359375 0.10113525390625 0.10113525390625 0.10113525390625 0.10113525390625 0.10113525390625 0.1011962890625 0.1011962890625 0.1011962890625 0.10125732421875 0.10125732421875 0.10125732421875 0.10125732421875 0.10125732421875 0.10137939453125 0.10137939453125 0.10137939453125 0.1014404296875 0.1014404296875 0.1014404296875 0.1014404296875 0.1014404296875 0.10150146484375 0.10150146484375 0.10150146484375 0.10162353515625 0.10162353515625 0.10162353515625 0.10162353515625 0.10162353515625 0.1016845703125 0.1016845703125 0.1016845703125 0.10174560546875 0.10174560546875 0.10174560546875 0.10174560546875 0.10174560546875 0.10186767578125 0.10186767578125 0.10186767578125 0.1019287109375 0.1019287109375 0.1019287109375 0.1019287109375 0.1019287109375 0.10198974609375 0.10198974609375 0.10198974609375 0.10205078125 0.10205078125 0.10205078125 0.10205078125 0.10205078125 0.1021728515625 0.1021728515625 0.1021728515625 0.10223388671875 0.10223388671875 0.10223388671875 0.10223388671875 0.10223388671875 0.102294921875 0.102294921875 0.102294921875 0.1024169921875 0.1024169921875 0.1024169921875 0.1024169921875 0.1024169921875 0.10247802734375 0.10247802734375 0.10247802734375 0.1025390625 0.1025390625 0.1025390625 0.1025390625 0.1025390625 0.1026611328125 0.1026611328125 0.1026611328125 0.10272216796875 0.10272216796875 0.10272216796875 0.10272216796875 0.10272216796875 0.102783203125 0.102783203125 0.102783203125 0.1029052734375 0.1029052734375 0.1029052734375 0.1029052734375 0.1029052734375 0.10296630859375 0.10296630859375 0.10296630859375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.10302734375 0.1031494140625 0.1031494140625 0.1031494140625 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.10321044921875 0.103271484375 0.103271484375 0.103271484375 0.10333251953125 0.10333251953125 0.10333251953125 0.10333251953125 0.10333251953125 0.10345458984375 0.10345458984375 0.10345458984375 0.103515625 0.103515625 0.103515625 0.103515625 0.103515625 0.10357666015625 0.10357666015625 0.10357666015625 0.10369873046875 0.10369873046875 0.10369873046875 0.10369873046875 0.10369873046875 0.103759765625 0.103759765625 0.103759765625 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.10382080078125 0.10394287109375 0.10394287109375 0.10394287109375 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.10400390625 0.10406494140625 0.10406494140625 0.10406494140625 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.10418701171875 0.104248046875 0.104248046875 0.104248046875 0.10430908203125 0.10430908203125 0.10430908203125 0.10430908203125 0.10430908203125 0.1043701171875 0.1043701171875 0.1043701171875 0.1044921875 0.1044921875 0.1044921875 0.1044921875 0.1044921875 0.10455322265625 0.10455322265625 0.10455322265625 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.1046142578125 0.104736328125 0.104736328125 0.104736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.10479736328125 0.1048583984375 0.1048583984375 0.1048583984375 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.10498046875 0.10504150390625 0.10504150390625 0.10504150390625 0.1051025390625 0.1051025390625 0.1051025390625 0.1051025390625 0.1051025390625 0.105224609375 0.105224609375 0.105224609375 0.10528564453125 0.10528564453125 0.10528564453125 0.10528564453125 0.10528564453125 0.1053466796875 0.1053466796875 0.1053466796875 0.10546875 0.10546875 0.10546875 0.10546875 0.10546875 0.10552978515625 0.10552978515625 0.10552978515625 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.1055908203125 0.10565185546875 0.10565185546875 0.10565185546875 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.10577392578125 0.1058349609375 0.1058349609375 0.1058349609375 0.10589599609375 0.10589599609375 0.10589599609375 0.10589599609375 0.10589599609375 0.10601806640625 0.10601806640625 0.10601806640625 0.1060791015625 0.1060791015625 0.1060791015625 0.1060791015625 0.1060791015625 0.10614013671875 0.10614013671875 0.10614013671875 0.10626220703125 0.10626220703125 0.10626220703125 0.10626220703125 0.10626220703125 0.1063232421875 0.1063232421875 0.1063232421875 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10638427734375 0.10650634765625 0.10650634765625 0.10650634765625 0.1065673828125 0.1065673828125 0.1065673828125 0.1065673828125 0.1065673828125 0.10662841796875 0.10662841796875 0.10662841796875 0.10675048828125 0.10675048828125 0.10675048828125 0.10675048828125 0.10675048828125 0.1068115234375 0.1068115234375 0.1068115234375 0.10687255859375 0.10687255859375 0.10687255859375 0.10687255859375 0.10687255859375 0.10693359375 0.10693359375 0.10693359375 0.1070556640625 0.1070556640625 0.1070556640625 0.1070556640625 0.1070556640625 0.10711669921875 0.10711669921875 0.10711669921875 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.107177734375 0.1072998046875 0.1072998046875 0.1072998046875 0.10736083984375 0.10736083984375 0.10736083984375 0.10736083984375 0.10736083984375 0.107421875 0.107421875 0.107421875 0.1075439453125 0.1075439453125 0.1075439453125 0.1075439453125 0.1075439453125 0.10760498046875 0.10760498046875 0.10760498046875 0.107666015625 0.107666015625 0.107666015625 0.107666015625 0.107666015625 0.1077880859375 0.1077880859375 0.1077880859375 0.10784912109375 0.10784912109375 0.10784912109375 0.10784912109375 0.10784912109375 0.10791015625 0.10791015625 0.10791015625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10797119140625 0.10809326171875 0.10809326171875 0.10809326171875 0.108154296875 0.108154296875 0.108154296875 0.108154296875 0.108154296875 0.10821533203125 0.10821533203125 0.10821533203125 0.10833740234375 0.10833740234375 0.10833740234375 0.10833740234375 0.10833740234375 0.1083984375 0.1083984375 0.1083984375 0.10845947265625 0.10845947265625 0.10845947265625 0.10845947265625 0.10845947265625 0.10858154296875 0.10858154296875 0.10858154296875 0.108642578125 0.108642578125 0.108642578125 0.108642578125 0.108642578125 0.10870361328125 0.10870361328125 0.10870361328125 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10882568359375 0.10888671875 0.10888671875 0.10888671875 0.10894775390625 0.10894775390625 0.10894775390625 0.10894775390625 0.10894775390625 0.10906982421875 0.10906982421875 0.10906982421875 0.109130859375 0.109130859375 0.109130859375 0.109130859375 0.109130859375 0.10919189453125 0.10919189453125 0.10919189453125 0.1092529296875 0.1092529296875 0.1092529296875 0.1092529296875 0.1092529296875 0.109375 0.109375 0.109375 0.10943603515625 0.10943603515625 0.10943603515625 0.10943603515625 0.10943603515625 0.1094970703125 0.1094970703125 0.1094970703125 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.109619140625 0.10968017578125 0.10968017578125 0.10968017578125 0.1097412109375 0.1097412109375 0.1097412109375 0.1097412109375 0.1097412109375 0.10986328125 0.10986328125 0.10986328125 0.10992431640625 0.10992431640625 0.10992431640625 0.10992431640625 0.10992431640625 0.1099853515625 0.1099853515625 0.1099853515625 0.110107421875 0.110107421875 0.110107421875 0.110107421875 0.110107421875 0.11016845703125 0.11016845703125 0.11016845703125 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.1102294921875 0.11029052734375 0.11029052734375 0.11029052734375 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.11041259765625 0.1104736328125 0.1104736328125 0.1104736328125 0.11053466796875 0.11053466796875 0.11053466796875 0.11053466796875 0.11053466796875 0.11065673828125 0.11065673828125 0.11065673828125 0.1107177734375 0.1107177734375 0.1107177734375 0.1107177734375 0.1107177734375 0.11077880859375 0.11077880859375 0.11077880859375 0.11090087890625 0.11090087890625 0.11090087890625 0.11090087890625 0.11090087890625 0.1109619140625 0.1109619140625 0.1109619140625 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.11102294921875 0.11114501953125 0.11114501953125 0.11114501953125 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.1112060546875 0.11126708984375 0.11126708984375 0.11126708984375 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.11138916015625 0.1114501953125 0.1114501953125 0.1114501953125 0.11151123046875 0.11151123046875 0.11151123046875 0.11151123046875 0.11151123046875 0.111572265625 0.111572265625 0.111572265625 0.1116943359375 0.1116943359375 0.1116943359375 0.1116943359375 0.1116943359375 0.11175537109375 0.11175537109375 0.11175537109375 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.11181640625 0.1119384765625 0.1119384765625 0.1119384765625 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.11199951171875 0.112060546875 0.112060546875 0.112060546875 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.1121826171875 0.11224365234375 0.11224365234375 0.11224365234375 0.1123046875 0.1123046875 0.1123046875 0.1123046875 0.1123046875 0.1124267578125 0.1124267578125 0.1124267578125 0.11248779296875 0.11248779296875 0.11248779296875 0.11248779296875 0.11248779296875 0.112548828125 0.112548828125 0.112548828125 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.11260986328125 0.11273193359375 0.11273193359375 0.11273193359375 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.11279296875 0.11285400390625 0.11285400390625 0.11285400390625 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.11297607421875 0.113037109375 0.113037109375 0.113037109375 0.11309814453125 0.11309814453125 0.11309814453125 0.11309814453125 0.11309814453125 0.11322021484375 0.11322021484375 0.11322021484375 0.11328125 0.11328125 0.11328125 0.11328125 0.11328125 0.11334228515625 0.11334228515625 0.11334228515625 0.11346435546875 0.11346435546875 0.11346435546875 0.11346435546875 0.11346435546875 0.113525390625 0.113525390625 0.113525390625 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11358642578125 0.11370849609375 0.11370849609375 0.11370849609375 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.11376953125 0.11383056640625 0.11383056640625 0.11383056640625 0.1138916015625 0.1138916015625 0.1138916015625 0.1138916015625 0.1138916015625 0.114013671875 0.114013671875 0.114013671875 0.11407470703125 0.11407470703125 0.11407470703125 0.11407470703125 0.11407470703125 0.1141357421875 0.1141357421875 0.1141357421875 0.1142578125 0.1142578125 0.1142578125 0.1142578125 0.1142578125 0.11431884765625 0.11431884765625 0.11431884765625 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.1143798828125 0.114501953125 0.114501953125 0.114501953125 0.11456298828125 0.11456298828125 0.11456298828125 0.11456298828125 0.11456298828125 0.1146240234375 0.1146240234375 0.1146240234375 0.11474609375 0.11474609375 0.11474609375 0.11474609375 0.11474609375 0.11480712890625 0.11480712890625 0.11480712890625 0.1148681640625 0.1148681640625 0.1148681640625 0.1148681640625 0.1148681640625 0.114990234375 0.114990234375 0.114990234375 0.11505126953125 0.11505126953125 0.11505126953125 0.11505126953125 0.11505126953125 0.1151123046875 0.1151123046875 0.1151123046875 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11517333984375 0.11529541015625 0.11529541015625 0.11529541015625 0.1153564453125 0.1153564453125 0.1153564453125 0.1153564453125 0.1153564453125 0.11541748046875 0.11541748046875 0.11541748046875 0.11553955078125 0.11553955078125 0.11553955078125 0.11553955078125 0.11553955078125 0.1156005859375 0.1156005859375 0.1156005859375 0.11566162109375 0.11566162109375 0.11566162109375 0.11566162109375 0.11566162109375 0.11578369140625 0.11578369140625 0.11578369140625 0.1158447265625 0.1158447265625 0.1158447265625 0.1158447265625 0.1158447265625 0.11590576171875 0.11590576171875 0.11590576171875 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.11602783203125 0.1160888671875 0.1160888671875 0.1160888671875 0.11614990234375 0.11614990234375 0.11614990234375 0.11614990234375 0.11614990234375 0.1162109375 0.1162109375 0.1162109375 0.1163330078125 0.1163330078125 0.1163330078125 0.1163330078125 0.1163330078125 0.11639404296875 0.11639404296875 0.11639404296875 0.116455078125 0.116455078125 0.116455078125 0.116455078125 0.116455078125 0.1165771484375 0.1165771484375 0.1165771484375 0.11663818359375 0.11663818359375 0.11663818359375 0.11663818359375 0.11663818359375 0.11669921875 0.11669921875 0.11669921875 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.1168212890625 0.11688232421875 0.11688232421875 0.11688232421875 0.116943359375 0.116943359375 0.116943359375 0.116943359375 0.116943359375 0.1170654296875 0.1170654296875 0.1170654296875 0.11712646484375 0.11712646484375 0.11712646484375 0.11712646484375 0.11712646484375 0.1171875 0.1171875 0.1171875 0.1173095703125 0.1173095703125 0.1173095703125 0.1173095703125 0.1173095703125 0.11737060546875 0.11737060546875 0.11737060546875 0.117431640625 0.117431640625 0.117431640625 0.117431640625 0.117431640625 0.11749267578125 0.11749267578125 0.11749267578125 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11761474609375 0.11767578125 0.11767578125 0.11767578125 0.11773681640625 0.11773681640625 0.11773681640625 0.11773681640625 0.11773681640625 0.11785888671875 0.11785888671875 0.11785888671875 0.117919921875 0.117919921875 0.117919921875 0.117919921875 0.117919921875 0.11798095703125 0.11798095703125 0.11798095703125 0.11810302734375 0.11810302734375 0.11810302734375 0.11810302734375 0.11810302734375 0.1181640625 0.1181640625 0.1181640625 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.11822509765625 0.11834716796875 0.11834716796875 0.11834716796875 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.118408203125 0.11846923828125 0.11846923828125 0.11846923828125 0.1185302734375 0.1185302734375 0.1185302734375 0.1185302734375 0.1185302734375 0.11865234375 0.11865234375 0.11865234375 0.11871337890625 0.11871337890625 0.11871337890625 0.11871337890625 0.11871337890625 0.1187744140625 0.1187744140625 0.1187744140625 0.118896484375 0.118896484375 0.118896484375 0.118896484375 0.118896484375 0.11895751953125 0.11895751953125 0.11895751953125 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.1190185546875 0.119140625 0.119140625 0.119140625 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.11920166015625 0.1192626953125 0.1192626953125 0.1192626953125 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.119384765625 0.11944580078125 0.11944580078125 0.11944580078125 0.1195068359375 0.1195068359375 0.1195068359375 0.1195068359375 0.1195068359375 0.11962890625 0.11962890625 0.11962890625 0.11968994140625 0.11968994140625 0.11968994140625 0.11968994140625 0.11968994140625 0.1197509765625 0.1197509765625 0.1197509765625 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.11981201171875 0.11993408203125 0.11993408203125 0.11993408203125 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.1199951171875 0.12005615234375 0.12005615234375 0.12005615234375 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.12017822265625 0.1202392578125 0.1202392578125 0.1202392578125 0.12030029296875 0.12030029296875 0.12030029296875 0.12030029296875 0.12030029296875 0.12042236328125 0.12042236328125 0.12042236328125 0.1204833984375 0.1204833984375 0.1204833984375 0.1204833984375 0.1204833984375 0.12054443359375 0.12054443359375 0.12054443359375 0.12066650390625 0.12066650390625 0.12066650390625 0.12066650390625 0.12066650390625 0.1207275390625 0.1207275390625 0.1207275390625 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.12078857421875 0.120849609375 0.120849609375 0.120849609375 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.1209716796875 0.12103271484375 0.12103271484375 0.12103271484375 0.12109375 0.12109375 0.12109375 0.12109375 0.12109375 0.1212158203125 0.1212158203125 0.1212158203125 0.12127685546875 0.12127685546875 0.12127685546875 0.12127685546875 0.12127685546875 0.121337890625 0.121337890625 0.121337890625 0.1214599609375 0.1214599609375 0.1214599609375 0.1214599609375 0.1214599609375 0.12152099609375 0.12152099609375 0.12152099609375 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.12158203125 0.1217041015625 0.1217041015625 0.1217041015625 0.12176513671875 0.12176513671875 0.12176513671875 0.12176513671875 0.12176513671875 0.121826171875 0.121826171875 0.121826171875 0.1219482421875 0.1219482421875 0.1219482421875 0.1219482421875 0.1219482421875 0.12200927734375 0.12200927734375 0.12200927734375 0.1220703125 0.1220703125 0.1220703125 0.1220703125 0.1220703125 0.12213134765625 0.12213134765625 0.12213134765625 0.12225341796875 0.12225341796875 0.12225341796875 0.12225341796875 0.12225341796875 0.122314453125 0.122314453125 0.122314453125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12237548828125 0.12249755859375 0.12249755859375 0.12249755859375 0.12255859375 0.12255859375 0.12255859375 0.12255859375 0.12255859375 0.12261962890625 0.12261962890625 0.12261962890625 0.12274169921875 0.12274169921875 0.12274169921875 0.12274169921875 0.12274169921875 0.122802734375 0.122802734375 0.122802734375 0.12286376953125 0.12286376953125 0.0 2.002716064453125e-05 3.999471664428711e-05 6.002187728881836e-05 7.998943328857422e-05 0.00010001659393310547 0.00011998414993286133 0.0001399517059326172 0.00015997886657714844 0.0001800060272216797 0.00020003318786621094 0.0002199411392211914 0.00023996829986572266 0.0002601146697998047 0.0002799034118652344 0.0002999305725097656 0.0003199577331542969 0.0003399848937988281 0.0003600120544433594 0.0003800392150878906 0.0004000663757324219 0.0004200935363769531 0.0004398822784423828 0.00045990943908691406 0.0004799365997314453 0.0005002021789550781 0.0005202293395996094 0.0005397796630859375 0.0005598068237304688 0.000579833984375 0.0005998611450195312 0.0006198883056640625 0.0006399154663085938 0.000659942626953125 0.0006799697875976562 0.0006999969482421875 0.0007200241088867188 0.00074005126953125 0.0007600784301757812 0.0007801055908203125 0.0008001327514648438 0.000820159912109375 0.0008401870727539062 0.0008602142333984375 0.0008797645568847656 0.0008997917175292969 0.0009198188781738281 0.0009398460388183594 0.0009598731994628906 0.000980377197265625 0.0010004043579101562 0.0010204315185546875 0.0010404586791992188 0.0010595321655273438 0.001079559326171875 0.0010995864868164062 0.0011196136474609375 0.0011396408081054688 0.00115966796875 0.0011796951293945312 0.0011997222900390625 0.0012197494506835938 0.001239776611328125 0.0012598037719726562 0.0012798309326171875 0.0012998580932617188 0.00131988525390625 0.0013399124145507812 0.0013599395751953125 0.0013799667358398438 0.001399993896484375 0.0014200210571289062 0.0014400482177734375 0.0014600753784179688 0.0014801025390625 0.0015001296997070312 0.0015201568603515625 0.0015401840209960938 0.001560211181640625 0.0015802383422851562 0.0016002655029296875 0.0016202926635742188 0.00164031982421875 0.0016603469848632812 0.0016803741455078125 0.0017004013061523438 0.001720428466796875 0.0017404556274414062 0.0017595291137695312 0.0017795562744140625 0.0017995834350585938 0.001819610595703125 0.0018396377563476562 0.0018596649169921875 0.0018796920776367188 0.00189971923828125 0.12481689453125 0.12481689453125 0.12481689453125 0.1248779296875 0.1248779296875 0.1248779296875 0.12493896484375 0.12493896484375 0.12493896484375 0.12493896484375 0.12493896484375 0.125 0.125 0.125 0.1251220703125 0.1251220703125 0.1251220703125 0.1251220703125 0.1251220703125 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.125244140625 0.1253662109375 0.1253662109375 0.1253662109375 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.12548828125 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.1256103515625 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.125732421875 0.1258544921875 0.1258544921875 0.1258544921875 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1259765625 0.1260986328125 0.1260986328125 0.1260986328125 0.1260986328125 0.1260986328125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.126220703125 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.1263427734375 0.12646484375 0.12646484375 0.12646484375 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.1265869140625 0.126708984375 0.126708984375 0.126708984375 0.126708984375 0.126708984375 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.1268310546875 0.126953125 0.126953125 0.126953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.1270751953125 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.127197265625 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.1273193359375 0.12744140625 0.12744140625 0.12744140625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.1275634765625 0.127685546875 0.127685546875 0.127685546875 0.127685546875 0.127685546875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1278076171875 0.1279296875 0.1279296875 0.1279296875 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.1280517578125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.128173828125 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.1282958984375 0.12841796875 0.12841796875 0.12841796875 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.1285400390625 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.128662109375 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.1287841796875 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.12890625 0.1290283203125 0.1290283203125 0.1290283203125 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.129150390625 0.1292724609375 0.1292724609375 0.1292724609375 0.1292724609375 0.1292724609375 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.12939453125 0.1295166015625 0.1295166015625 0.1295166015625 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.129638671875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1297607421875 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1298828125 0.1300048828125 0.1300048828125 0.1300048828125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.130126953125 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.1302490234375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.13037109375 0.1304931640625 0.1304931640625 0.1304931640625 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.130615234375 0.1307373046875 0.1307373046875 0.1307373046875 0.1307373046875 0.1307373046875 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.130859375 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.1309814453125 0.131103515625 0.131103515625 0.131103515625 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.1312255859375 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.13134765625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.1314697265625 0.131591796875 0.131591796875 0.131591796875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1317138671875 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1318359375 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.1319580078125 0.132080078125 0.132080078125 0.132080078125 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.1322021484375 0.13232421875 0.13232421875 0.13232421875 0.13232421875 0.13232421875 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.1324462890625 0.132568359375 0.132568359375 0.132568359375 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1326904296875 0.1328125 0.1328125 0.1328125 0.1328125 0.1328125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.1329345703125 0.133056640625 0.133056640625 0.133056640625 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.1331787109375 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.13330078125 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.1334228515625 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.133544921875 0.1336669921875 0.1336669921875 0.1336669921875 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1337890625 0.1339111328125 0.1339111328125 0.1339111328125 0.1339111328125 0.1339111328125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.134033203125 0.1341552734375 0.1341552734375 0.1341552734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.13427734375 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.1343994140625 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.134521484375 0.1346435546875 0.1346435546875 0.1346435546875 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.134765625 0.1348876953125 0.1348876953125 0.1348876953125 0.1348876953125 0.1348876953125 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.135009765625 0.1351318359375 0.1351318359375 0.1351318359375 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.13525390625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.1353759765625 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.135498046875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1356201171875 0.1357421875 0.1357421875 0.1357421875 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.1358642578125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.135986328125 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.1361083984375 0.13623046875 0.13623046875 0.13623046875 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.1363525390625 0.136474609375 0.136474609375 0.136474609375 0.136474609375 0.136474609375 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.1365966796875 0.13671875 0.13671875 0.13671875 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.1368408203125 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.136962890625 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.1370849609375 0.13720703125 0.13720703125 0.13720703125 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.1373291015625 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.137451171875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1375732421875 0.1376953125 0.1376953125 0.1376953125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.1378173828125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.137939453125 0.1380615234375 0.1380615234375 0.1380615234375 0.1380615234375 0.1380615234375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.13818359375 0.1383056640625 0.1383056640625 0.1383056640625 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.138427734375 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.1385498046875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.138671875 0.1387939453125 0.1387939453125 0.1387939453125 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.138916015625 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.1390380859375 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.13916015625 0.1392822265625 0.1392822265625 0.1392822265625 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.139404296875 0.1395263671875 0.1395263671875 0.1395263671875 0.1395263671875 0.1395263671875 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1396484375 0.1397705078125 0.1397705078125 0.1397705078125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.139892578125 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.1400146484375 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.14013671875 0.1402587890625 0.1402587890625 0.1402587890625 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.140380859375 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.1405029296875 0.140625 0.140625 0.140625 0.140625 0.140625 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.1407470703125 0.140869140625 0.140869140625 0.140869140625 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.1409912109375 0.14111328125 0.14111328125 0.14111328125 0.14111328125 0.14111328125 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.1412353515625 0.141357421875 0.141357421875 0.141357421875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1414794921875 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1416015625 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.1417236328125 0.141845703125 0.141845703125 0.141845703125 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.1419677734375 0.14208984375 0.14208984375 0.14208984375 0.14208984375 0.14208984375 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.1422119140625 0.142333984375 0.142333984375 0.142333984375 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.1424560546875 0.142578125 0.142578125 0.142578125 0.142578125 0.142578125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.1427001953125 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.142822265625 0.1429443359375 0.1429443359375 0.1429443359375 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.14306640625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.1431884765625 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.143310546875 0.1434326171875 0.1434326171875 0.1434326171875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1435546875 0.1436767578125 0.1436767578125 0.1436767578125 0.1436767578125 0.1436767578125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.143798828125 0.1439208984375 0.1439208984375 0.1439208984375 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.14404296875 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.1441650390625 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.144287109375 0.1444091796875 0.1444091796875 0.1444091796875 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.14453125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.1446533203125 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.144775390625 0.1448974609375 0.1448974609375 0.1448974609375 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.14501953125 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.1451416015625 0.145263671875 0.145263671875 0.145263671875 0.145263671875 0.145263671875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1453857421875 0.1455078125 0.1455078125 0.1455078125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.1456298828125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.145751953125 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.1458740234375 0.14599609375 0.14599609375 0.14599609375 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.1461181640625 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.146240234375 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.1463623046875 0.146484375 0.146484375 0.146484375 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.1466064453125 0.146728515625 0.146728515625 0.146728515625 0.146728515625 0.146728515625 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.1468505859375 0.14697265625 0.14697265625 0.14697265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.1470947265625 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.147216796875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1473388671875 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1474609375 0.1475830078125 0.1475830078125 0.1475830078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.147705078125 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.1478271484375 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.14794921875 0.1480712890625 0.1480712890625 0.1480712890625 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.148193359375 0.1483154296875 0.1483154296875 0.1483154296875 0.1483154296875 0.1483154296875 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1484375 0.1485595703125 0.1485595703125 0.1485595703125 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.148681640625 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.1488037109375 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.14892578125 0.1490478515625 0.1490478515625 0.1490478515625 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.149169921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1492919921875 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1494140625 0.1495361328125 0.1495361328125 0.1495361328125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.149658203125 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.1497802734375 0.14990234375 0.14990234375 0.14990234375 0.14990234375 0.14990234375 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.1500244140625 0.150146484375 0.150146484375 0.150146484375 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.1502685546875 0.150390625 0.150390625 0.150390625 0.150390625 0.150390625 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.1505126953125 0.150634765625 0.150634765625 0.150634765625 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.1507568359375 0.15087890625 0.15087890625 0.15087890625 0.15087890625 0.15087890625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.1510009765625 0.151123046875 0.151123046875 0.151123046875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1512451171875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1513671875 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.1514892578125 0.151611328125 0.151611328125 0.151611328125 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.1517333984375 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.15185546875 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.1519775390625 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.152099609375 0.1522216796875 0.1522216796875 0.1522216796875 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.15234375 0.1524658203125 0.1524658203125 0.1524658203125 0.1524658203125 0.1524658203125 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.152587890625 0.1527099609375 0.1527099609375 0.1527099609375 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.15283203125 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.1529541015625 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.153076171875 0.1531982421875 0.1531982421875 0.1531982421875 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1533203125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.1534423828125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.153564453125 0.1536865234375 0.1536865234375 0.1536865234375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.15380859375 0.1539306640625 0.1539306640625 0.1539306640625 0.1539306640625 0.1539306640625 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.154052734375 0.1541748046875 0.1541748046875 0.1541748046875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.154296875 0.1544189453125 0.1544189453125 0.1544189453125 0.1544189453125 0.1544189453125 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.154541015625 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.1546630859375 0.15478515625 0.15478515625 0.15478515625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.1549072265625 0.155029296875 0.155029296875 0.155029296875 0.155029296875 0.155029296875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1551513671875 0.1552734375 0.1552734375 0.1552734375 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.1553955078125 0.155517578125 0.155517578125 0.155517578125 0.155517578125 0.155517578125 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.1556396484375 0.15576171875 0.15576171875 0.15576171875 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.1558837890625 0.156005859375 0.156005859375 0.156005859375 0.156005859375 0.156005859375 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.1561279296875 0.15625 0.15625 0.15625 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.1563720703125 0.156494140625 0.156494140625 0.156494140625 0.156494140625 0.156494140625 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.1566162109375 0.15673828125 0.15673828125 0.15673828125 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.1568603515625 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.156982421875 0.1571044921875 0.1571044921875 0.1571044921875 0.1571044921875 0.1571044921875 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1572265625 0.1573486328125 0.1573486328125 0.1573486328125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.157470703125 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.1575927734375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.15771484375 0.1578369140625 0.1578369140625 0.1578369140625 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.157958984375 0.1580810546875 0.1580810546875 0.1580810546875 0.1580810546875 0.1580810546875 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.158203125 0.1583251953125 0.1583251953125 0.1583251953125 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.158447265625 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.1585693359375 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.15869140625 0.1588134765625 0.1588134765625 0.1588134765625 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.158935546875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1590576171875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1591796875 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.1593017578125 0.159423828125 0.159423828125 0.159423828125 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.1595458984375 0.15966796875 0.15966796875 0.15966796875 0.15966796875 0.15966796875 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.1597900390625 0.159912109375 0.159912109375 0.159912109375 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.1600341796875 0.16015625 0.16015625 0.16015625 0.16015625 0.16015625 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.1602783203125 0.160400390625 0.160400390625 0.160400390625 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.1605224609375 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.16064453125 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.1607666015625 0.160888671875 0.160888671875 0.160888671875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1610107421875 0.1611328125 0.1611328125 0.1611328125 0.1611328125 0.1611328125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.1612548828125 0.161376953125 0.161376953125 0.161376953125 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.1614990234375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.16162109375 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.1617431640625 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.161865234375 0.1619873046875 0.1619873046875 0.1619873046875 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.162109375 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.1622314453125 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.162353515625 0.1624755859375 0.1624755859375 0.1624755859375 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.16259765625 0.1627197265625 0.1627197265625 0.1627197265625 0.1627197265625 0.1627197265625 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.162841796875 0.1629638671875 0.1629638671875 0.1629638671875 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1630859375 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.1632080078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.163330078125 0.1634521484375 0.1634521484375 0.1634521484375 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.16357421875 0.1636962890625 0.1636962890625 0.1636962890625 0.1636962890625 0.1636962890625 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.163818359375 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1639404296875 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.1641845703125 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.164306640625 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1644287109375 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.1646728515625 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.164794921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1649169921875 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.1651611328125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.165283203125 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1654052734375 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.1656494140625 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.165771484375 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1658935546875 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.1661376953125 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.166259765625 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.1663818359375 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.16650390625 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.166748046875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1668701171875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.1669921875 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.167236328125 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.1673583984375 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.16748046875 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.167724609375 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.1678466796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.16796875 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.168212890625 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.1683349609375 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.16845703125 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1685791015625 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1688232421875 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1689453125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1690673828125 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.1693115234375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.16943359375 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1695556640625 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.1697998046875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.169921875 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1700439453125 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.1702880859375 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.17041015625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1705322265625 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1707763671875 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1708984375 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.1710205078125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.171142578125 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.17138671875 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.1715087890625 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171630859375 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.171875 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.1719970703125 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.172119140625 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.17236328125 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.1724853515625 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.172607421875 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1728515625 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.1729736328125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.173095703125 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.17333984375 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.1734619140625 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.173583984375 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1737060546875 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.1739501953125 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.174072265625 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1741943359375 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.1744384765625 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.174560546875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1746826171875 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.1749267578125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.175048828125 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1751708984375 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.1754150390625 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.175537109375 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.1756591796875 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.17578125 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.176025390625 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.1761474609375 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.17626953125 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.176513671875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1766357421875 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.1767578125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.177001953125 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.1771240234375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.17724609375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.177490234375 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.1776123046875 0.177734375 0.177734375 0.177734375 0.177734375 0.177734375 0.177734375 0.177734375 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.177978515625 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.1781005859375 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.17822265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1783447265625 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1785888671875 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1787109375 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1788330078125 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.1790771484375 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.17919921875 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1793212890625 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1795654296875 0.1796875 0.1796875 0.1796875 0.1796875 0.1796875 0.1796875 0.1796875 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1798095703125 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.1800537109375 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.18017578125 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.1802978515625 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.180419921875 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1806640625 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.1807861328125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.180908203125 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.18115234375 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.1812744140625 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181396484375 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.181640625 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.1817626953125 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.181884765625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.18212890625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.1822509765625 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.182373046875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1826171875 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.1827392578125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.182861328125 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1829833984375 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.1832275390625 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.183349609375 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1834716796875 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.1837158203125 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.183837890625 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1839599609375 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.1842041015625 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.184326171875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1844482421875 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.1846923828125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.184814453125 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1849365234375 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.1851806640625 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.185302734375 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.1854248046875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185546875 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.185791015625 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.1859130859375 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.18603515625 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.186279296875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1864013671875 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.1865234375 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.186767578125 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.1868896484375 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.18701171875 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.187255859375 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1873779296875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1875 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1876220703125 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.1878662109375 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.18798828125 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1881103515625 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1883544921875 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1884765625 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1885986328125 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.1888427734375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.18896484375 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1890869140625 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.1893310546875 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.189453125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1895751953125 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.1898193359375 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.18994140625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.1900634765625 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.190185546875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1904296875 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.1905517578125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.190673828125 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.19091796875 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.1910400390625 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.191162109375 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.19140625 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.1915283203125 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.191650390625 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.19189453125 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.1920166015625 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.192138671875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1922607421875 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.1925048828125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.192626953125 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1927490234375 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.1929931640625 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.193115234375 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1932373046875 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.1934814453125 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.193603515625 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1937255859375 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.1939697265625 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.194091796875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1942138671875 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.1944580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.194580078125 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.1947021484375 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.19482421875 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.195068359375 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1951904296875 0.1953125 0.1953125 0.1953125 0.1953125 0.1953125 0.1953125 0.1953125 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.195556640625 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.1956787109375 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.19580078125 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.196044921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1961669921875 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.1962890625 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.196533203125 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.1966552734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.19677734375 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1968994140625 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.1971435546875 0.197265625 0.197265625 0.197265625 0.197265625 0.197265625 0.197265625 0.197265625 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1973876953125 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.1976318359375 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.19775390625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1978759765625 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1981201171875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1982421875 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1983642578125 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.1986083984375 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.19873046875 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1988525390625 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.1990966796875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.19921875 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.1993408203125 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.199462890625 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.19970703125 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.1998291015625 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.199951171875 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2001953125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.2003173828125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.200439453125 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.20068359375 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.2008056640625 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.200927734375 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.201171875 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.2012939453125 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.201416015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.20166015625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.2017822265625 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.201904296875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2020263671875 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.2022705078125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.202392578125 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2025146484375 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.2027587890625 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.202880859375 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2030029296875 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.2032470703125 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.203369140625 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2034912109375 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.2037353515625 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.203857421875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2039794921875 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.2041015625 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.204345703125 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.2044677734375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.20458984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.204833984375 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.2049560546875 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205078125 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.205322265625 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.2054443359375 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.20556640625 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.205810546875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2059326171875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.2060546875 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.206298828125 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.2064208984375 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.20654296875 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2066650390625 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.2069091796875 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.20703125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2071533203125 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.2073974609375 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.20751953125 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2076416015625 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2078857421875 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2080078125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2081298828125 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.2083740234375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.20849609375 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.2086181640625 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208740234375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.208984375 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.2091064453125 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.209228515625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.20947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.2095947265625 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.209716796875 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2099609375 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.2100830078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.210205078125 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.21044921875 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.2105712890625 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.210693359375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2109375 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.2110595703125 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.211181640625 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2113037109375 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.2115478515625 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.211669921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2117919921875 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.2120361328125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.212158203125 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2122802734375 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.2125244140625 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.212646484375 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2127685546875 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.2130126953125 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.213134765625 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2132568359375 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.2135009765625 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.213623046875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2137451171875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.2138671875 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.214111328125 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.2142333984375 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.21435546875 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.214599609375 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.2147216796875 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.21484375 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.215087890625 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.2152099609375 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.21533203125 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.215576171875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2156982421875 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2158203125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2159423828125 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.2161865234375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.21630859375 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2164306640625 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.2166748046875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.216796875 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2169189453125 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.2171630859375 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.21728515625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2174072265625 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2176513671875 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2177734375 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2178955078125 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.2181396484375 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.21826171875 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.2183837890625 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.218505859375 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.21875 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.2188720703125 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.218994140625 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.21923828125 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.2193603515625 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.219482421875 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2197265625 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.2198486328125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.219970703125 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.22021484375 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.2203369140625 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.220458984375 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2205810546875 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.2208251953125 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.220947265625 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2210693359375 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.2213134765625 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.221435546875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2215576171875 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.2218017578125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.221923828125 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2220458984375 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.2222900390625 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.222412109375 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2225341796875 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.2227783203125 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.222900390625 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.2230224609375 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.22314453125 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.223388671875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2235107421875 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.2236328125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.223876953125 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.2239990234375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.22412109375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.224365234375 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.2244873046875 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224609375 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.224853515625 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.2249755859375 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.22509765625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2252197265625 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2254638671875 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2255859375 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2257080078125 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.2259521484375 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.22607421875 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2261962890625 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2264404296875 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2265625 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2266845703125 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.2269287109375 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.22705078125 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2271728515625 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2274169921875 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2275390625 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.2276611328125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.227783203125 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.22802734375 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.2281494140625 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228271484375 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.228515625 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.2286376953125 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.228759765625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.22900390625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.2291259765625 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.229248046875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2294921875 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.2296142578125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.229736328125 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.22998046875 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.2301025390625 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.230224609375 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2303466796875 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.2305908203125 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.230712890625 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2308349609375 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.2310791015625 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.231201171875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2313232421875 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.2315673828125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.231689453125 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2318115234375 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.2320556640625 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.232177734375 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.2322998046875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232421875 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.232666015625 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.2327880859375 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.23291015625 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.233154296875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2332763671875 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.2333984375 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.233642578125 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.2337646484375 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.23388671875 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.234130859375 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.2342529296875 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234375 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.234619140625 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.2347412109375 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.23486328125 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2349853515625 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2352294921875 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2353515625 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2354736328125 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.2357177734375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.23583984375 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2359619140625 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.2362060546875 0.236328125 0.236328125 0.236328125 0.236328125 0.236328125 0.236328125 0.236328125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2364501953125 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.2366943359375 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.23681640625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.2369384765625 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.237060546875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2373046875 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.2374267578125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.237548828125 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.23779296875 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.2379150390625 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.238037109375 0.23828125 0.23828125 0.23828125 0.23828125 0.23828125 0.23828125 0.23828125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.2384033203125 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.238525390625 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.23876953125 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.2388916015625 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.239013671875 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2392578125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.2393798828125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.239501953125 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2396240234375 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.2398681640625 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.239990234375 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2401123046875 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.2403564453125 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.240478515625 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2406005859375 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.2408447265625 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.240966796875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2410888671875 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.2413330078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.241455078125 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.2415771484375 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.24169921875 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.241943359375 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2420654296875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.2421875 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.242431640625 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.2425537109375 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.24267578125 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.242919921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2430419921875 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.2431640625 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.243408203125 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.2435302734375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.24365234375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.243896484375 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.2440185546875 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.244140625 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2442626953125 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.2445068359375 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.24462890625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2447509765625 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2449951171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2451171875 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2452392578125 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.2454833984375 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.24560546875 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2457275390625 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.2459716796875 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.24609375 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2462158203125 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.2464599609375 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.24658203125 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.2467041015625 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.246826171875 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2470703125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.2471923828125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.247314453125 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.24755859375 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.2476806640625 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.247802734375 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.248046875 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.2481689453125 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.248291015625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.24853515625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.2486572265625 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.248779296875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2489013671875 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.2491455078125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.249267578125 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2493896484375 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.2496337890625 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.249755859375 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.2498779296875 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.250244140625 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.25048828125 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.250732421875 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.2509765625 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.251220703125 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.25146484375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251708984375 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.251953125 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.252197265625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.25244140625 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.252685546875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.2529296875 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.253173828125 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.25341796875 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.253662109375 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.25390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.254150390625 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.25439453125 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.254638671875 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.2548828125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.255126953125 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.25537109375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255615234375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.255859375 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.256103515625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.25634765625 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.256591796875 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.2568359375 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.257080078125 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.25732421875 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.257568359375 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.2578125 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.258056640625 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.25830078125 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.258544921875 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.2587890625 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.259033203125 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.25927734375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259521484375 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.259765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.260009765625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.26025390625 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.260498046875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.2607421875 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.260986328125 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.26123046875 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.261474609375 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.26171875 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.261962890625 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.26220703125 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.262451171875 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.2626953125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.262939453125 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.26318359375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263427734375 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263671875 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.263916015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.26416015625 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.264404296875 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.2646484375 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.264892578125 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.26513671875 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265380859375 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.265869140625 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.26611328125 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.266357421875 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.2666015625 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.266845703125 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.26708984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267333984375 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267578125 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.267822265625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.26806640625 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.268310546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.2685546875 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.268798828125 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.26904296875 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.269287109375 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.26953125 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.269775390625 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.27001953125 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.270263671875 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.2705078125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.270751953125 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.27099609375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271240234375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271484375 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.271728515625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.27197265625 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.272216796875 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.2724609375 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.272705078125 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.27294921875 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.273193359375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.2734375 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.273681640625 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.27392578125 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.274169921875 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.2744140625 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.274658203125 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.27490234375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275146484375 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275390625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.275634765625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.27587890625 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.276123046875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.2763671875 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.276611328125 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.27685546875 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.277099609375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.27734375 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.277587890625 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.27783203125 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.278076171875 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.2783203125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.278564453125 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.27880859375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279052734375 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279296875 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.279541015625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.27978515625 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.280029296875 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.2802734375 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.280517578125 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.28076171875 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.281005859375 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.28125 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.281494140625 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.28173828125 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.281982421875 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.2822265625 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.282470703125 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.28271484375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.282958984375 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283203125 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.283447265625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.28369140625 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.283935546875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.2841796875 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.284423828125 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.28466796875 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.284912109375 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.28515625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.285400390625 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.28564453125 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.285888671875 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.2861328125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.286376953125 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.28662109375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.286865234375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287109375 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.287353515625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.28759765625 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.287841796875 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.2880859375 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.288330078125 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.28857421875 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.288818359375 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.2890625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.289306640625 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.28955078125 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.289794921875 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.2900390625 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.290283203125 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.29052734375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.290771484375 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291015625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.291259765625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.29150390625 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.291748046875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.2919921875 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.292236328125 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.29248046875 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.292724609375 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.29296875 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.293212890625 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.29345703125 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.293701171875 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.2939453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.294189453125 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.29443359375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294677734375 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.294921875 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.295166015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.29541015625 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.295654296875 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.2958984375 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.296142578125 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.29638671875 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296630859375 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.296875 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.297119140625 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.29736328125 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.297607421875 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.2978515625 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.298095703125 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.29833984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298583984375 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.298828125 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.299072265625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.29931640625 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.299560546875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.2998046875 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.300048828125 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.30029296875 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.300537109375 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.30078125 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.301025390625 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.30126953125 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.301513671875 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.3017578125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.302001953125 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.30224609375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302490234375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302734375 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.302978515625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.30322265625 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.303466796875 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.3037109375 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.303955078125 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.30419921875 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.304443359375 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.3046875 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.304931640625 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.30517578125 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.305419921875 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.3056640625 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.305908203125 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.30615234375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306396484375 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306640625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.306884765625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.30712890625 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.307373046875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.3076171875 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.307861328125 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.30810546875 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.308349609375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.30859375 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.308837890625 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.30908203125 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.309326171875 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.3095703125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.309814453125 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.31005859375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310302734375 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310546875 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.310791015625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.31103515625 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.311279296875 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.3115234375 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.311767578125 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.31201171875 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.312255859375 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.3125 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.312744140625 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.31298828125 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.313232421875 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.3134765625 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.313720703125 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.31396484375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314208984375 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314453125 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.314697265625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.31494140625 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.315185546875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.3154296875 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.315673828125 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.31591796875 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.316162109375 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.31640625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.316650390625 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.31689453125 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.317138671875 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.3173828125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.317626953125 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.31787109375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318115234375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318359375 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.318603515625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.31884765625 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.319091796875 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.3193359375 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.319580078125 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.31982421875 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.320068359375 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.3203125 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.320556640625 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.32080078125 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.321044921875 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.3212890625 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.321533203125 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.32177734375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322021484375 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322265625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.322509765625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.32275390625 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.322998046875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.3232421875 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.323486328125 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.32373046875 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.323974609375 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.32421875 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.324462890625 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.32470703125 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.324951171875 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.3251953125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.325439453125 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.32568359375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.325927734375 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326171875 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.326416015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.32666015625 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.326904296875 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.3271484375 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.327392578125 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.32763671875 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.327880859375 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.328369140625 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.32861328125 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.328857421875 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.329345703125 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.32958984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.329833984375 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.330322265625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.33056640625 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.330810546875 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.331298828125 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.33154296875 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.331787109375 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.332275390625 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.33251953125 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.332763671875 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.3330078125 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.33349609375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333740234375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.333984375 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.33447265625 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.334716796875 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.3349609375 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.33544921875 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.335693359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.3359375 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.33642578125 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.336669921875 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.3369140625 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337158203125 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337646484375 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.337890625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338134765625 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.338623046875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.3388671875 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339111328125 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.339599609375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.33984375 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340087890625 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.340576171875 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.3408203125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341064453125 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341552734375 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.341796875 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.342041015625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.34228515625 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.3427734375 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.343017578125 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34326171875 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.34375 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.343994140625 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.34423828125 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.3447265625 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.344970703125 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.34521484375 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345703125 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.345947265625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.34619140625 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.3466796875 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.346923828125 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.34716796875 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347412109375 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.347900390625 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.34814453125 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348388671875 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.348876953125 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.34912109375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349365234375 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.349853515625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.35009765625 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350341796875 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.350830078125 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.35107421875 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.351318359375 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.3515625 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.35205078125 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.352294921875 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.3525390625 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.35302734375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353271484375 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.353515625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.35400390625 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.354248046875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.3544921875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.35498046875 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.355224609375 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35546875 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.35595703125 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.356201171875 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.3564453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.356689453125 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357177734375 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357421875 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.357666015625 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.358154296875 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.3583984375 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.358642578125 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359130859375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359375 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.359619140625 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.360107421875 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.3603515625 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.360595703125 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.36083984375 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361328125 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.361572265625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.36181640625 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.3623046875 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.362548828125 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36279296875 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.36328125 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.363525390625 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.36376953125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.3642578125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.364501953125 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.36474609375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365234375 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.365478515625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.36572265625 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.365966796875 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.366455078125 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.36669921875 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.366943359375 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.367431640625 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.36767578125 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.367919921875 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.368408203125 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.36865234375 0.0019197463989257812 0.0019397735595703125 0.00196075439453125 0.001979827880859375 0.0020008087158203125 0.0020198822021484375 0.002040863037109375 0.0020599365234375 0.0020809173583984375 0.0020999908447265625 0.0021190643310546875 0.002140045166015625 0.00215911865234375 0.0021800994873046875 0.0021991729736328125 0.00222015380859375 0.002239227294921875 0.0022602081298828125 0.0022792816162109375 0.002300262451171875 0.0023193359375 0.0023403167724609375 0.0023593902587890625 0.00238037109375 0.002399444580078125 0.0024204254150390625 0.0024394989013671875 0.002460479736328125 0.00247955322265625 0.0025005340576171875 0.0025196075439453125 0.00254058837890625 0.002559661865234375 0.0025806427001953125 0.0025997161865234375 0.002620697021484375 0.0026397705078125 0.0026607513427734375 0.0026798248291015625 0.0027008056640625 0.002719879150390625 0.0027408599853515625 0.0027599334716796875 0.002780914306640625 0.00279998779296875 0.002819061279296875 0.0028400421142578125 0.0028591156005859375 0.002880096435546875 0.002899169921875 0.0029201507568359375 0.0029392242431640625 0.002960205078125 0.002979278564453125 0.0030002593994140625 0.0030193328857421875 0.003040313720703125 0.00305938720703125 0.0030803680419921875 0.0030994415283203125 0.00312042236328125 0.003139495849609375 0.0031604766845703125 0.0031795501708984375 0.003200531005859375 0.0032196044921875 0.0032405853271484375 0.0032596588134765625 0.0032806396484375 0.003299713134765625 0.0033206939697265625 0.0033397674560546875 0.003360748291015625 0.00337982177734375 0.0034008026123046875 0.0034198760986328125 0.00344085693359375 0.003459930419921875 0.0034809112548828125 0.0034999847412109375 0.0035190582275390625 0.0035400390625 0.003559112548828125 0.0035800933837890625 0.0035991668701171875 0.003620147705078125 0.00363922119140625 0.0036602020263671875 0.0036792755126953125 0.00370025634765625 0.003719329833984375 0.0037403106689453125 0.0037593841552734375 0.003780364990234375 0.0037994384765625 0.0038204193115234375 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.37060546875 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.370849609375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37109375 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.37158203125 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.371826171875 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.3720703125 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.37255859375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.372802734375 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.373046875 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.37353515625 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.373779296875 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.3740234375 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.37451171875 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.374755859375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375244140625 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.375732421875 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.3759765625 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376220703125 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376708984375 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.376953125 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377197265625 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.377685546875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.3779296875 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378173828125 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.378662109375 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.37890625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379150390625 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.379638671875 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.3798828125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.380126953125 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.38037109375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.380859375 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.381103515625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.38134765625 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.3818359375 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.382080078125 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.38232421875 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.3828125 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.383056640625 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.38330078125 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.3837890625 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.384033203125 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.38427734375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.384521484375 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.385009765625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.38525390625 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385498046875 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.385986328125 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.38623046875 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386474609375 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.386962890625 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.38720703125 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387451171875 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.387939453125 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.38818359375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388427734375 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.388916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.38916015625 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.389404296875 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.3896484375 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.39013671875 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390380859375 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.390625 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.39111328125 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.391357421875 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.3916015625 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.39208984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392333984375 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.392578125 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.39306640625 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.393310546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.3935546875 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.393798828125 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.394287109375 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.39453125 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.394775390625 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.395263671875 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.3955078125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.395751953125 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396240234375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396484375 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.396728515625 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.397216796875 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.3974609375 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.397705078125 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.398193359375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.3984375 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.398681640625 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.39892578125 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.3994140625 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.399658203125 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.39990234375 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400390625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.400634765625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.40087890625 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.4013671875 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.401611328125 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40185546875 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.40234375 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.402587890625 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.40283203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.4033203125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.403564453125 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.40380859375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404052734375 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.404541015625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.40478515625 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405029296875 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.405517578125 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.40576171875 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406005859375 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.406494140625 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.40673828125 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.406982421875 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.407470703125 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.40771484375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.407958984375 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.408203125 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.40869140625 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.408935546875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.4091796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.40966796875 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.409912109375 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41015625 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.41064453125 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.410888671875 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.4111328125 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.41162109375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.411865234375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.412109375 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.41259765625 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.412841796875 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.4130859375 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413330078125 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.413818359375 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.4140625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414306640625 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.414794921875 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.4150390625 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415283203125 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.415771484375 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416015625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416259765625 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.416748046875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.4169921875 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.417236328125 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41748046875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.41796875 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.418212890625 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.41845703125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.4189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.419189453125 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.41943359375 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.419921875 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.420166015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.42041015625 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.4208984375 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.421142578125 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.42138671875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.421875 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.422119140625 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.42236328125 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.422607421875 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.423095703125 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.42333984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.423583984375 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.424072265625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.42431640625 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.424560546875 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.425048828125 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.42529296875 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.425537109375 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.426025390625 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.42626953125 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.426513671875 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.427001953125 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.42724609375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427490234375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.427734375 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.42822265625 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.428466796875 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.4287109375 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.42919921875 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.429443359375 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.4296875 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.43017578125 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.430419921875 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.4306640625 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.43115234375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431396484375 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431640625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.431884765625 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.432373046875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.4326171875 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.432861328125 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.433349609375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.43359375 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.433837890625 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.434326171875 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.4345703125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.434814453125 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435302734375 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435546875 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.435791015625 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.436279296875 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.4365234375 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.436767578125 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.43701171875 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.4375 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.437744140625 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.43798828125 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.4384765625 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.438720703125 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.43896484375 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439453125 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.439697265625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.43994140625 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.4404296875 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.440673828125 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.44091796875 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441162109375 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.441650390625 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.44189453125 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442138671875 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.442626953125 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.44287109375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443115234375 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.443603515625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.44384765625 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444091796875 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.444580078125 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.44482421875 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445068359375 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.445556640625 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.44580078125 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.446044921875 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.4462890625 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.44677734375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447021484375 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.447265625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.44775390625 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.447998046875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.4482421875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.44873046875 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.448974609375 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44921875 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.44970703125 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.449951171875 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.4501953125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450439453125 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.450927734375 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451171875 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451416015625 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.451904296875 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.4521484375 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452392578125 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.452880859375 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453125 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453369140625 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.453857421875 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.4541015625 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454345703125 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.454833984375 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455078125 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.455322265625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.45556640625 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.4560546875 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.456298828125 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45654296875 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.45703125 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.457275390625 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.45751953125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.4580078125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.458251953125 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.45849609375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.458984375 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.459228515625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.45947265625 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.4599609375 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.460205078125 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.46044921875 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.460693359375 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.461181640625 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.46142578125 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.461669921875 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.462158203125 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.46240234375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.462646484375 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.463134765625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.46337890625 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.463623046875 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.464111328125 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.46435546875 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.464599609375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46484375 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.46533203125 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.465576171875 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.4658203125 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.46630859375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466552734375 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.466796875 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.46728515625 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.467529296875 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.4677734375 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.46826171875 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.468505859375 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46875 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.46923828125 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.469482421875 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.4697265625 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.469970703125 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470458984375 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470703125 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.470947265625 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.471435546875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.4716796875 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.471923828125 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.472412109375 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.47265625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.472900390625 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.473388671875 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.4736328125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.473876953125 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.47412109375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474609375 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.474853515625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.47509765625 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.4755859375 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.475830078125 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.47607421875 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.4765625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.476806640625 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.47705078125 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.4775390625 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.477783203125 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.47802734375 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478515625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.478759765625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.47900390625 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479248046875 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.479736328125 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.47998046875 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480224609375 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.480712890625 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.48095703125 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481201171875 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.481689453125 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.48193359375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482177734375 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.482666015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.48291015625 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.483154296875 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.4833984375 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.48388671875 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484130859375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.484375 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.48486328125 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.485107421875 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.4853515625 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.48583984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486083984375 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.486328125 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.48681640625 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.487060546875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.4873046875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.48779296875 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.488037109375 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.48828125 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.488525390625 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.489013671875 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.4892578125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489501953125 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.489990234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490234375 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490478515625 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.490966796875 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.4912109375 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 0.491455078125 \ No newline at end of file diff --git a/runtime/test/test_files/generate_flash_attn_ground_truth.py b/runtime/test/test_files/generate_flash_attn_ground_truth.py index 892ec145e..a2cf8f44b 100644 --- a/runtime/test/test_files/generate_flash_attn_ground_truth.py +++ b/runtime/test/test_files/generate_flash_attn_ground_truth.py @@ -1,24 +1,8 @@ import torch import flash_attn -from flash_attn.flash_attn_interface import _flash_attn_forward, _flash_attn_backward, flash_attn_func - -assert flash_attn.__version__ == "2.0.9" - -b = 1 -seq_len = 128 -num_heads = 3 -head_dims = 32 -causal = True - - -input_len = b * seq_len * num_heads * head_dims -q = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/30000 -k = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/40000 -v = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/50000 - - -out, q, k, v, out_padded, softmax_lse, S_dmask, rng_state = _flash_attn_forward(q, k, v, 0.0, 0.5, causal, False) +from flash_attn.flash_attn_interface import _flash_attn_forward, _flash_attn_backward, flash_attn_with_kvcache +assert flash_attn.__version__ == "2.4.2" def outputs(tensor, filename): to_write = torch.flatten(tensor) @@ -27,29 +11,99 @@ def outputs(tensor, filename): of.write(f"{num} ") of.close() -print("generating flash_attn fwd inputs...") -outputs(q, "flash_attn_inputs_q.data") -outputs(k, "flash_attn_inputs_k.data") -outputs(v, "flash_attn_inputs_v.data") - - -print("generating ground truth for flash_attn fwd output...") -outputs(out, "flash_attn_fwd_outputs.data") - - -print("generating flash_attn bwd inputs...") -dout = torch.ones(out.shape, dtype=torch.float16, device='cuda')/32 -outputs(dout, "flash_attn_inputs_dout.data") - - -dq, dk, dv = torch.empty_like(q), torch.empty_like(k), torch.empty_like(v) - -_flash_attn_backward( - dout, q, k, v, out, softmax_lse, - dq, dk, dv, 0, 0.5, causal -) - -print("generating ground truth for flash_attn bwd output...") -outputs(dq, "flash_attn_bwd_outputs_dq.data") -outputs(dk, "flash_attn_bwd_outputs_dk.data") -outputs(dv, "flash_attn_bwd_outputs_dv.data") +def generate_flash_attn_fwd_backward_data(): + b = 1 + seq_len = 128 + num_heads = 3 + head_dims = 32 + causal = True + input_len = b * seq_len * num_heads * head_dims + q = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/30000 + k = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/40000 + v = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/50000 + + window_size = (-1, -1) + out, q, k, v, out_padded, softmax_lse, S_dmask, rng_state = _flash_attn_forward(q, k, v, 0.0, 0.5, causal, window_size, None, False) + + print("generating flash_attn fwd inputs...") + outputs(q, "flash_attn_inputs_q.data") + outputs(k, "flash_attn_inputs_k.data") + outputs(v, "flash_attn_inputs_v.data") + + + print("generating ground truth for flash_attn fwd output...") + outputs(out, "flash_attn_fwd_outputs.data") + + + print("generating flash_attn bwd inputs...") + dout = torch.ones(out.shape, dtype=torch.float16, device='cuda')/32 + outputs(dout, "flash_attn_inputs_dout.data") + + + dq, dk, dv = torch.empty_like(q), torch.empty_like(k), torch.empty_like(v) + + _flash_attn_backward( + dout, q, k, v, out, softmax_lse, + dq, dk, dv, 0, 0.5, causal, window_size, None, deterministic=True + ) + + print("generating ground truth for flash_attn bwd output...") + outputs(dq, "flash_attn_bwd_outputs_dq.data") + outputs(dk, "flash_attn_bwd_outputs_dk.data") + outputs(dv, "flash_attn_bwd_outputs_dv.data") + + +def generate_flash_attn_kvcache_data(): + b = 2 + seq_len = 128 + seq_len_q = 1 + num_heads = 3 + head_dims = 32 + causal = True + input_len = b * seq_len_q * num_heads * head_dims + q = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len_q, num_heads, head_dims))/30000 + k = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len_q, num_heads, head_dims))/40000 + v = torch.arange(input_len, dtype=torch.float16, device='cuda').reshape((b, seq_len_q, num_heads, head_dims))/50000 + + cache_len = b * seq_len * num_heads * head_dims + kcache = torch.arange(cache_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/40000 + vcache = torch.arange(cache_len, dtype=torch.float16, device='cuda').reshape((b, seq_len, num_heads, head_dims))/50000 + + outputs(kcache, "flash_attn_kvcache_inputs_kcache.data") + outputs(vcache, "flash_attn_kvcache_inputs_vcache.data") + cache_seqlens = torch.ones((b,), dtype=torch.int32, device='cuda')*64 + window_size = (-1, -1) + cos = None + sin = None + cache_batch_idx = None + out = flash_attn_with_kvcache( + q, + kcache, + vcache, + k, + v, + cos, + sin, + cache_seqlens, + cache_batch_idx, + softmax_scale=0.5, + causal=causal, + window_size=window_size, + rotary_interleaved=False, + num_splits=1, + ) + + print("generating flash_attn kvcache inputs...") + outputs(q, "flash_attn_kvcache_inputs_q.data") + outputs(k, "flash_attn_kvcache_inputs_k.data") + outputs(v, "flash_attn_kvcache_inputs_v.data") + outputs(cache_seqlens, "flash_attn_kvcache_inputs_cache_seqlens.data") + + print("generating ground truth for flash_attn kvcache output...") + outputs(out, "flash_attn_kvcache_outputs.data") + outputs(kcache, "flash_attn_kvcache_outputs_kcache.data") + outputs(vcache, "flash_attn_kvcache_outputs_vcache.data") + +if __name__ == "__main__": + generate_flash_attn_fwd_backward_data() + generate_flash_attn_kvcache_data() diff --git a/scripts/e2e/build_and_test_e2e.sh b/scripts/e2e/build_and_test_e2e.sh index 916db54aa..6c5adbdb7 100755 --- a/scripts/e2e/build_and_test_e2e.sh +++ b/scripts/e2e/build_and_test_e2e.sh @@ -13,7 +13,7 @@ bash scripts/compiler/build_and_lit_test.sh # build runtime bash scripts/runtime/build_and_test.sh --cuda --python --no-test # build torch_frontend -bash scripts/frontends/torch-frontend/build_and_test.sh +bash frontends/torch-frontend/scripts/build_and_test.sh pip3 install $ROOT_PROJ_DIR/external/AITemplate/python/dist/*.whl --force-reinstall pip3 install $ROOT_PROJ_DIR/compiler/build/python/dist/*.whl --force-reinstall diff --git a/scripts/runtime/build_and_test.sh b/scripts/runtime/build_and_test.sh index c80b18676..a65201090 100755 --- a/scripts/runtime/build_and_test.sh +++ b/scripts/runtime/build_and_test.sh @@ -25,10 +25,6 @@ while [[ $# -gt 1 ]]; do BRT_TEST=OFF shift ;; - --flash) - brt_BUILD_FLASH_ATTN=ON - shift - ;; *) echo "Invalid option: $1" exit 1 @@ -52,7 +48,6 @@ BRT_USE_CUDA=${BRT_USE_CUDA:-OFF} BRT_ENABLE_ASAN=${BRT_ENABLE_ASAN:-OFF} BRT_ENABLE_PYTHON_BINDINGS=${BRT_ENABLE_PYTHON_BINDINGS:-OFF} CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} -brt_BUILD_FLASH_ATTN=${brt_BUILD_FLASH_ATTN:-OFF} # test options BRT_TEST=${BRT_TEST:-ON} @@ -69,7 +64,6 @@ cmake -GNinja \ -DCMAKE_INSTALL_PREFIX="$BUILD_DIR/install" \ -Dbrt_USE_CUDA=${BRT_USE_CUDA} \ -Dbrt_USE_NCCL=${BRT_USE_NCCL} \ - -Dbrt_BUILD_FLASH_ATTN=${brt_BUILD_FLASH_ATTN} \ -Dbrt_ENABLE_ASAN=${BRT_ENABLE_ASAN} \ -Dbrt_ENABLE_PYTHON_BINDINGS=${BRT_ENABLE_PYTHON_BINDINGS} From bac412c990454e717665e7a79f7cc58504755051 Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Mon, 22 Jan 2024 20:48:06 +0000 Subject: [PATCH 2/9] add lfs libflash_attn.so --- runtime/test/test_files/external_libs/libflash_attn.so | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 runtime/test/test_files/external_libs/libflash_attn.so diff --git a/runtime/test/test_files/external_libs/libflash_attn.so b/runtime/test/test_files/external_libs/libflash_attn.so new file mode 100755 index 000000000..3f53bd7d4 --- /dev/null +++ b/runtime/test/test_files/external_libs/libflash_attn.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00cff217327552d99a2341b768389bfffbb38934e0b193d1eb77b6b0e84efcd +size 339166176 From e3cf2001d0052891b83f52e6e5791199b3ca3fac Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Tue, 23 Jan 2024 22:01:00 +0000 Subject: [PATCH 3/9] support bufferize & enable flash attn e2e tests --- .../HloToByreTensor/HloToByreCustom.h | 29 +- .../include/byteir/Dialect/Byre/ByreOps.td | 14 +- .../HloToByreTensor/HloToByreCustom.cpp | 363 +- compiler/lib/Dialect/Byre/IR/ByreDialect.cpp | 16 + .../BufferizableOpInterfaceImpl.cpp | 72 + compiler/lib/Pipelines/ByreTensorOpt.cpp | 6 +- external_libs/libs/libflash_attn.so | 3 + external_libs/runtime/flash_attn/lib/alibi.h | 84 +- .../runtime/flash_attn/lib/block_info.h | 83 +- .../runtime/flash_attn/lib/flash_api.cu | 5 +- .../runtime/flash_attn/lib/flash_bwd_kernel.h | 3420 +++++++++-------- .../lib/flash_bwd_launch_template.h | 743 ++-- .../lib/flash_fwd_launch_template.h | 730 ++-- .../runtime/flash_attn/lib/kernel_traits.h | 725 ++-- .../flash_attn/lib/kernel_traits_sm90.h | 262 +- .../runtime/flash_attn/lib/softmax.h | 504 +-- external_libs/runtime/flash_attn/lib/utils.h | 793 ++-- .../test_files/external_libs/libflash_attn.so | 4 +- runtime/test/test_files/flash_attn_bwd.mlir | 2 +- runtime/test/test_files/flash_attn_fwd.mlir | 2 +- scripts/prepare.sh | 6 + tests/numerical_test/main.py | 6 +- 22 files changed, 4387 insertions(+), 3485 deletions(-) create mode 100755 external_libs/libs/libflash_attn.so diff --git a/compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h b/compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h index 34f1b66ac..34ae6cd65 100644 --- a/compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h +++ b/compiler/include/byteir/Conversion/HloToByreTensor/HloToByreCustom.h @@ -32,34 +32,17 @@ class FuncOp; } // namespace func class Operation; -// abstract struct for convert rule -struct ByreCustomConvertRuleBase { - ByreCustomConvertRuleBase(){}; - ~ByreCustomConvertRuleBase() {} - - virtual llvm::StringRef getCustomLibPath(llvm::StringRef callee) { - return ""; - } - - virtual llvm::StringRef getApiName(llvm::StringRef callee) { return ""; } - - virtual ArrayAttr getExtraArgs(mhlo::CustomCallOp op, - PatternRewriter &rewriter) { - return {}; - } +struct ByreCustomConfig { + std::function getCustomLibPath; + std::function getApiName; + std::function getExtraArgs; }; -// convert rules for cuda custom ops -struct CudaCustomConvertRule : public ByreCustomConvertRuleBase { - llvm::StringRef getCustomLibPath(llvm::StringRef callee) override; - llvm::StringRef getApiName(llvm::StringRef callee) override; - ArrayAttr getExtraArgs(mhlo::CustomCallOp op, - PatternRewriter &rewriter) override; -}; +ByreCustomConfig getCudaByreCustomConfig(); // use ByreCustomConvertRuleBase to decide how to convert to byre custom op std::unique_ptr> -createConvertHloToByreCustomPass(ByreCustomConvertRuleBase *); +createConvertHloToByreCustomPass(const ByreCustomConfig &); } // namespace mlir diff --git a/compiler/include/byteir/Dialect/Byre/ByreOps.td b/compiler/include/byteir/Dialect/Byre/ByreOps.td index 5e860a254..92f449188 100644 --- a/compiler/include/byteir/Dialect/Byre/ByreOps.td +++ b/compiler/include/byteir/Dialect/Byre/ByreOps.td @@ -179,7 +179,8 @@ def Byre_AliasOp } def Byre_CustomOp : Byre_Op<"custom", - [HasParent<"func::FuncOp">, ByreInterface]> { + [HasParent<"func::FuncOp">, + DeclareOpInterfaceMethods]> { let summary = "compute custom operation passed by library path and api name. "; let description = [{ Example: @@ -193,13 +194,22 @@ def Byre_CustomOp : Byre_Op<"custom", StrAttr:$lib_path, StrAttr:$api_name, Variadic:$operands, - ArrayAttr:$extra_args + ArrayAttr:$extra_args, + OptionalAttr:$memory_effects ); let results = (outs Variadic:$results ); + let builders = [ + OpBuilder<(ins "StringRef":$lib_path, + "StringRef":$api_name, + "ValueRange":$inputs, + "ValueRange":$outputs, + "ArrayAttr":$extra_args)> + ]; + let extraClassDeclaration = [{ FunctionType getType(); diff --git a/compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp b/compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp index 6cb0f2909..7a8be358f 100644 --- a/compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp +++ b/compiler/lib/Conversion/HloToByreTensor/HloToByreCustom.cpp @@ -30,64 +30,9 @@ using namespace mlir; using namespace llvm; -class ConvertHloToByreCustomPass : public ::mlir::OperationPass { -public: - using Base = ConvertHloToByreCustomPass; - - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertHloToByreCustomPass) - - ConvertHloToByreCustomPass() - : ::mlir::OperationPass( - ::mlir::TypeID::get()) {} - - ConvertHloToByreCustomPass(const ConvertHloToByreCustomPass &other) - : ::mlir::OperationPass(other) {} - - explicit ConvertHloToByreCustomPass(ByreCustomConvertRuleBase *converter) - : ::mlir::OperationPass( - ::mlir::TypeID::get()), - converter(converter) {} - - ::llvm::StringRef getDescription() const override { - return "Convert hlo ops to byre custom ops."; - } - - /// Returns the derived pass name. - static constexpr ::llvm::StringLiteral getPassName() { - return ::llvm::StringLiteral("ConvertHloToByreCustomPass"); - } - ::llvm::StringRef getName() const override { - return "ConvertHloToByreCustomPass"; - } - - /// Support isa/dyn_cast functionality for the derived pass class. - static bool classof(const ::mlir::Pass *pass) { - return pass->getTypeID() == - ::mlir::TypeID::get(); - } - - /// A clone method to create a copy of this pass. - std::unique_ptr<::mlir::Pass> clonePass() const override { - return std::make_unique( - *static_cast(this)); - } - - /// Return the dialect that must be loaded in the context before this pass. - void getDependentDialects(::mlir::DialectRegistry ®istry) const override { - registry.insert(); - registry.insert(); - registry.insert(); - } - - void runOnOperation() override; - -protected: - ByreCustomConvertRuleBase *converter = nullptr; -}; - namespace { constexpr StringRef getFlashAttnLibPath() { - return "external_libs/libflash_attn.so"; + return "external_libs/libs/libflash_attn.so"; } constexpr StringRef getFlashAttnFwdAPI() { return "run_flash_attn_fwd"; } constexpr StringRef getFlashAttnBwdAPI() { return "run_flash_attn_bwd"; } @@ -96,137 +41,139 @@ constexpr StringRef getFlashAttnKVCacheAPI() { } } // namespace -StringRef mlir::CudaCustomConvertRule::getCustomLibPath(StringRef callee) { - if (callee == getFlashAttnFwdName() || callee == getFlashAttnBwdName()) { - return getFlashAttnLibPath(); - } - return ""; -} - -StringRef mlir::CudaCustomConvertRule::getApiName(StringRef callee) { - if (callee == getFlashAttnFwdName()) { - return getFlashAttnFwdAPI(); - } else if (callee == getFlashAttnBwdName()) { - return getFlashAttnBwdAPI(); - } - return ""; -} - -ArrayAttr mlir::CudaCustomConvertRule::getExtraArgs(mhlo::CustomCallOp op, - PatternRewriter &rewriter) { - SmallVector extraArgs; - auto callee = op.getCallTargetName(); - if (callee == getFlashAttnFwdName() || callee == getFlashAttnBwdName()) { - ShapedType qShapeTy; - ShapedType kShapeTy; - ShapedType vShapeTy; - ShapedType oShapeTy; +ByreCustomConfig mlir::getCudaByreCustomConfig() { + ByreCustomConfig config; + config.getCustomLibPath = [=](StringRef callee) { + if (callee == getFlashAttnFwdName() || callee == getFlashAttnBwdName()) { + return getFlashAttnLibPath(); + } + return StringRef(""); + }; + config.getApiName = [=](StringRef callee) { if (callee == getFlashAttnFwdName()) { - qShapeTy = op.getOperand(0).getType().dyn_cast(); - kShapeTy = op.getOperand(1).getType().dyn_cast(); - vShapeTy = op.getOperand(2).getType().dyn_cast(); - oShapeTy = op.getResult(0).getType().dyn_cast(); - } else { - qShapeTy = op.getOperand(1).getType().dyn_cast(); - kShapeTy = op.getOperand(2).getType().dyn_cast(); - vShapeTy = op.getOperand(3).getType().dyn_cast(); - oShapeTy = op.getOperand(4).getType().dyn_cast(); + return getFlashAttnFwdAPI(); + } else if (callee == getFlashAttnBwdName()) { + return getFlashAttnBwdAPI(); } - if (!qShapeTy || !qShapeTy.hasStaticShape() || !kShapeTy || - !kShapeTy.hasStaticShape() || !vShapeTy || !vShapeTy.hasStaticShape() || - !oShapeTy || !oShapeTy.hasStaticShape()) - assert(false && "unexpected flash attention shape!"); - - auto qShape = qShapeTy.getShape(); - auto kShape = kShapeTy.getShape(); - auto vShape = vShapeTy.getShape(); - auto oShape = oShapeTy.getShape(); - int64_t batchSizeQ = qShape[0]; - int64_t seqlenQ = qShape[1]; - int64_t numHeadsQ = qShape[2]; - int64_t headSizeQ = qShape[3]; - int64_t batchSizeK = kShape[0]; - int64_t seqlenK = kShape[1]; - int64_t numHeadsK = kShape[2]; - int64_t headSizeK = kShape[3]; - assert(headSizeQ == headSizeK && batchSizeQ == batchSizeK); - assert(headSizeQ % 8 == 0); - - auto roundMultiple = [](int x, int m) { return (x + m - 1) / m * m; }; - const int headSize = roundMultiple(headSizeQ, 8); - const int headSizeRounded = roundMultiple(headSize, 32); - const int seqlenQRounded = roundMultiple(seqlenQ, 128); - const int seqlenKRounded = roundMultiple(seqlenK, 128); - - uint32_t qBatchStride = qShape[1] * qShape[2] * qShape[3]; - uint32_t kBatchStride = kShape[1] * kShape[2] * kShape[3]; - uint32_t vBatchStride = vShape[1] * vShape[2] * vShape[3]; - uint32_t oBatchStride = oShape[1] * oShape[2] * oShape[3]; - uint32_t qRowStride = qShape[2] * qShape[3]; - uint32_t kRowStride = kShape[2] * kShape[3]; - uint32_t vRowStride = vShape[2] * vShape[3]; - uint32_t oRowStride = oShape[2] * oShape[3]; - uint32_t qHeadStride = qShape[3]; - uint32_t kHeadStride = kShape[3]; - uint32_t vHeadStride = vShape[3]; - uint32_t oHeadStride = oShape[3]; - - DictionaryAttr byteirAttrs = - op->getAttr(getCustomCallAttrName()).cast(); - if (!byteirAttrs) - assert(false && "byteir attribute not found!"); - bool causal = byteirAttrs.get("causal").cast().getValue(); - float softmaxScale = byteirAttrs.get("softmax_scale") - .cast() - .getValue() - .convertToDouble(); - float dropoutP = byteirAttrs.get("dropout_p") - .cast() - .getValue() - .convertToDouble(); - int windowSizeLeft = -1; - int windowSizeRight = -1; - // causal=true is the same as causal=false in this case - if (seqlenQ == 1) - causal = false; - if (causal) - windowSizeRight = 0; - - // extra args should match kernel api call - extraArgs.push_back(rewriter.getI64IntegerAttr(qBatchStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(kBatchStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(vBatchStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(oBatchStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(qRowStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(kRowStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(vRowStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(oRowStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(qHeadStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(kHeadStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(vHeadStride)); - extraArgs.push_back(rewriter.getI64IntegerAttr(oHeadStride)); - - extraArgs.push_back(rewriter.getI64IntegerAttr(batchSizeQ)); - extraArgs.push_back(rewriter.getI64IntegerAttr(numHeadsQ)); - extraArgs.push_back(rewriter.getI64IntegerAttr(numHeadsK)); - extraArgs.push_back(rewriter.getI64IntegerAttr(headSize)); - extraArgs.push_back(rewriter.getI64IntegerAttr(headSizeRounded)); - extraArgs.push_back(rewriter.getF32FloatAttr(softmaxScale)); - extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenQ)); - extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenK)); - extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenQRounded)); - extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenKRounded)); - extraArgs.push_back(rewriter.getF32FloatAttr(dropoutP)); - extraArgs.push_back(rewriter.getI64IntegerAttr(windowSizeLeft)); - extraArgs.push_back(rewriter.getI64IntegerAttr(windowSizeRight)); - return ArrayAttr::get(rewriter.getContext(), extraArgs); - } - return {}; + return StringRef(""); + }; + config.getExtraArgs = [=](mhlo::CustomCallOp op) { + SmallVector extraArgs; + auto callee = op.getCallTargetName(); + if (callee == getFlashAttnFwdName() || callee == getFlashAttnBwdName()) { + OpBuilder rewriter(op); + ShapedType qShapeTy; + ShapedType kShapeTy; + ShapedType vShapeTy; + ShapedType oShapeTy; + if (callee == getFlashAttnFwdName()) { + qShapeTy = op.getOperand(0).getType().dyn_cast(); + kShapeTy = op.getOperand(1).getType().dyn_cast(); + vShapeTy = op.getOperand(2).getType().dyn_cast(); + oShapeTy = op.getResult(0).getType().dyn_cast(); + } else { + qShapeTy = op.getOperand(1).getType().dyn_cast(); + kShapeTy = op.getOperand(2).getType().dyn_cast(); + vShapeTy = op.getOperand(3).getType().dyn_cast(); + oShapeTy = op.getOperand(4).getType().dyn_cast(); + } + if (!qShapeTy || !qShapeTy.hasStaticShape() || !kShapeTy || + !kShapeTy.hasStaticShape() || !vShapeTy || + !vShapeTy.hasStaticShape() || !oShapeTy || !oShapeTy.hasStaticShape()) + assert(false && "unexpected flash attention shape!"); + + auto qShape = qShapeTy.getShape(); + auto kShape = kShapeTy.getShape(); + auto vShape = vShapeTy.getShape(); + auto oShape = oShapeTy.getShape(); + int64_t batchSizeQ = qShape[0]; + int64_t seqlenQ = qShape[1]; + int64_t numHeadsQ = qShape[2]; + int64_t headSizeQ = qShape[3]; + int64_t batchSizeK = kShape[0]; + int64_t seqlenK = kShape[1]; + int64_t numHeadsK = kShape[2]; + int64_t headSizeK = kShape[3]; + assert(headSizeQ == headSizeK && batchSizeQ == batchSizeK); + assert(headSizeQ % 8 == 0); + + auto roundMultiple = [](int x, int m) { return (x + m - 1) / m * m; }; + const int headSize = roundMultiple(headSizeQ, 8); + const int headSizeRounded = roundMultiple(headSize, 32); + const int seqlenQRounded = roundMultiple(seqlenQ, 128); + const int seqlenKRounded = roundMultiple(seqlenK, 128); + + uint32_t qBatchStride = qShape[1] * qShape[2] * qShape[3]; + uint32_t kBatchStride = kShape[1] * kShape[2] * kShape[3]; + uint32_t vBatchStride = vShape[1] * vShape[2] * vShape[3]; + uint32_t oBatchStride = oShape[1] * oShape[2] * oShape[3]; + uint32_t qRowStride = qShape[2] * qShape[3]; + uint32_t kRowStride = kShape[2] * kShape[3]; + uint32_t vRowStride = vShape[2] * vShape[3]; + uint32_t oRowStride = oShape[2] * oShape[3]; + uint32_t qHeadStride = qShape[3]; + uint32_t kHeadStride = kShape[3]; + uint32_t vHeadStride = vShape[3]; + uint32_t oHeadStride = oShape[3]; + + DictionaryAttr byteirAttrs = + op->getAttr(getCustomCallAttrName()).cast(); + if (!byteirAttrs) + assert(false && "byteir attribute not found!"); + bool causal = byteirAttrs.get("causal").cast().getValue(); + float softmaxScale = byteirAttrs.get("softmax_scale") + .cast() + .getValue() + .convertToDouble(); + float dropoutP = byteirAttrs.get("dropout_p") + .cast() + .getValue() + .convertToDouble(); + int windowSizeLeft = -1; + int windowSizeRight = -1; + // causal=true is the same as causal=false in this case + if (seqlenQ == 1) + causal = false; + if (causal) + windowSizeRight = 0; + + // extra args should match kernel api call + extraArgs.push_back(rewriter.getI64IntegerAttr(qBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(kBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(vBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(oBatchStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(qRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(kRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(vRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(oRowStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(qHeadStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(kHeadStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(vHeadStride)); + extraArgs.push_back(rewriter.getI64IntegerAttr(oHeadStride)); + + extraArgs.push_back(rewriter.getI64IntegerAttr(batchSizeQ)); + extraArgs.push_back(rewriter.getI64IntegerAttr(numHeadsQ)); + extraArgs.push_back(rewriter.getI64IntegerAttr(numHeadsK)); + extraArgs.push_back(rewriter.getI64IntegerAttr(headSize)); + extraArgs.push_back(rewriter.getI64IntegerAttr(headSizeRounded)); + extraArgs.push_back(rewriter.getF32FloatAttr(softmaxScale)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenQ)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenK)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenQRounded)); + extraArgs.push_back(rewriter.getI64IntegerAttr(seqlenKRounded)); + extraArgs.push_back(rewriter.getF32FloatAttr(dropoutP)); + extraArgs.push_back(rewriter.getI64IntegerAttr(windowSizeLeft)); + extraArgs.push_back(rewriter.getI64IntegerAttr(windowSizeRight)); + return ArrayAttr::get(rewriter.getContext(), extraArgs); + } + return ArrayAttr({}); + }; + return config; } struct ConvertCustomCallOpToByreCustom : public RewritePattern { ConvertCustomCallOpToByreCustom(MLIRContext *context, - ByreCustomConvertRuleBase *converter) + const ByreCustomConfig &converter) : RewritePattern(MatchAnyOpTypeTag(), 1, context), converter(converter) {} LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { @@ -234,42 +181,56 @@ struct ConvertCustomCallOpToByreCustom : public RewritePattern { return failure(); auto customCallOp = cast(op); auto callee = customCallOp.getCallTargetName(); - auto libPath = converter->getCustomLibPath(callee); + auto libPath = converter.getCustomLibPath(callee); if (libPath == "") return failure(); - auto apiName = converter->getApiName(callee); - auto extraArgs = converter->getExtraArgs(customCallOp, rewriter); + auto apiName = converter.getApiName(callee); + auto extraArgs = converter.getExtraArgs(customCallOp); auto newOp = rewriter.create( customCallOp.getLoc(), customCallOp.getResultTypes(), libPath, apiName, - customCallOp.getOperands(), extraArgs); + customCallOp.getOperands(), extraArgs, /*memEffects*/ ArrayAttr{}); rewriter.replaceOp(op, newOp.getResults()); return success(); } -private: - ByreCustomConvertRuleBase *converter; +protected: + ByreCustomConfig converter; }; -void ConvertHloToByreCustomPass::runOnOperation() { - // early return if no converter - if (nullptr == converter) { - return; +class ConvertHloToByreCustomPass + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertHloToByreCustomPass) + + ConvertHloToByreCustomPass(const ByreCustomConfig &rule) : converter(rule) {} + + /// Return the dialect that must be loaded in the context before this pass. + void getDependentDialects(::mlir::DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + registry.insert(); } - MLIRContext &ctx = getContext(); - RewritePatternSet patterns(&ctx); - auto funcOp = getOperation(); + void runOnOperation() override { + MLIRContext &ctx = getContext(); + RewritePatternSet patterns(&ctx); + auto funcOp = getOperation(); - patterns.add(patterns.getContext(), - converter); - FrozenRewritePatternSet frozenPatterns(std::move(patterns)); - if (failed(applyPatternsAndFoldGreedily(funcOp, frozenPatterns))) { - signalPassFailure(); + patterns.add(patterns.getContext(), + converter); + FrozenRewritePatternSet frozenPatterns(std::move(patterns)); + if (failed(applyPatternsAndFoldGreedily(funcOp, frozenPatterns))) { + signalPassFailure(); + } } -} + +protected: + ByreCustomConfig converter; +}; std::unique_ptr> -mlir::createConvertHloToByreCustomPass(ByreCustomConvertRuleBase *converter) { +mlir::createConvertHloToByreCustomPass(const ByreCustomConfig &converter) { return std::make_unique(converter); } diff --git a/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp b/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp index 99ba25872..e7fef952d 100644 --- a/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp +++ b/compiler/lib/Dialect/Byre/IR/ByreDialect.cpp @@ -489,6 +489,22 @@ Value AliasOp::getViewSource() { return getSource(); } // CustomOp //===----------------------------------------------------------------------===/ +void CustomOp::build(OpBuilder &builder, OperationState &result, + StringRef lib_path, StringRef api_name, ValueRange inputs, + ValueRange outputs, ArrayAttr extra_args) { + SmallVector memoryEffectAttrs; + memoryEffectAttrs.append( + inputs.size(), builder.getAttr(MemoryEffect::Read)); + memoryEffectAttrs.append( + outputs.size(), builder.getAttr(MemoryEffect::Write)); + build(builder, result, TypeRange{}, lib_path, api_name, + llvm::to_vector(llvm::concat(llvm::to_vector(inputs), + llvm::to_vector(outputs))), + extra_args, builder.getArrayAttr(memoryEffectAttrs)); +} + +std::string CustomOp::getCalleeName() { return "custom"; } + LogicalResult CustomOp::verify() { return verifyOpInEntryPointFunc(this->getOperation()); } diff --git a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp index cb6d5cd0a..75d9ecc02 100644 --- a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp @@ -163,11 +163,83 @@ struct ByreComputeOpBufferization return success(); } }; + +struct ByreCustomOpBufferization + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToAllocation(Operation * /*op*/, OpResult /*opResult*/) const { + return true; + } + + bool bufferizesToMemoryRead(Operation * /*op*/, OpOperand & /*opOperand*/, + const AnalysisState & /*state*/) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation * /*op*/, OpOperand & /*opOperand*/, + const AnalysisState & /*state*/) const { + return false; + } + + AliasingOpResultList + getAliasingOpResults(Operation * /*op*/, OpOperand & /*opOperand*/, + const AnalysisState & /*state*/) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const BufferizationOptions &options) const { + SmallVector bufferOperands, bufferResults; + + for (auto &&opOperand : op->getOpOperands()) { + auto buffer = + getBufferInValidLayout(rewriter, op->getLoc(), opOperand, options); + if (failed(buffer)) + return failure(); + + bufferOperands.push_back(*buffer); + } + + for (auto &&opResult : op->getOpResults()) { + auto tensorType = opResult.getType().dyn_cast_or_null(); + if (!tensorType) + return failure(); + + bool dealloc = shouldDeallocateOpResult(opResult, options); + auto tensorAlloc = allocateTensorForShapedValue( + rewriter, op->getLoc(), opResult, /*escapse*/ !dealloc, options); + if (failed(tensorAlloc)) + return failure(); + + auto memrefType = + MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + Value buffer = rewriter.create( + op->getLoc(), memrefType, *tensorAlloc); + bufferResults.push_back(buffer); + } + + auto newOp = rewriter.create( + op->getLoc(), cast(op).getLibPath(), + cast(op).getApiName(), bufferOperands, bufferResults, + cast(op).getExtraArgs()); + + for (auto &&namedAttr : op->getAttrs()) { + StringRef name = namedAttr.getName(); + if (!name.startswith("bufferization.") && !newOp->hasAttr(name)) { + newOp->setAttr(name, namedAttr.getValue()); + } + } + + bufferization::replaceOpWithBufferizedValues(rewriter, op, bufferResults); + return success(); + } +}; } // namespace void mlir::byre::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, byre::ByreDialect *) { byre::ComputeOp::attachInterface(*ctx); + byre::CustomOp::attachInterface(*ctx); }); } diff --git a/compiler/lib/Pipelines/ByreTensorOpt.cpp b/compiler/lib/Pipelines/ByreTensorOpt.cpp index 25e720544..5b1f710ad 100644 --- a/compiler/lib/Pipelines/ByreTensorOpt.cpp +++ b/compiler/lib/Pipelines/ByreTensorOpt.cpp @@ -44,11 +44,9 @@ void createByreTensorOptPipelineImpl(OpPassManager &pm, std::string entryFunc, pm.addPass(createSymbolDCEPass()); pm.addPass(createCanonicalizerPass()); pm.addNestedPass( - createConvertHloToByreTensorPass(appendArgTypes)); - auto *gpuRule = new CudaCustomConvertRule(); + createConvertHloToByreCustomPass(getCudaByreCustomConfig())); pm.addNestedPass( - createConvertHloToByreCustomPass(gpuRule)); - delete gpuRule; + createConvertHloToByreTensorPass(appendArgTypes)); pm.addPass(createCanonicalizerPass()); } } // namespace diff --git a/external_libs/libs/libflash_attn.so b/external_libs/libs/libflash_attn.so new file mode 100755 index 000000000..8f0cfff40 --- /dev/null +++ b/external_libs/libs/libflash_attn.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e8e3639fa7bff088ba97ffc626cdb7fe2038b0ba36344e6a28b35a1a1ee94d +size 312538456 diff --git a/external_libs/runtime/flash_attn/lib/alibi.h b/external_libs/runtime/flash_attn/lib/alibi.h index 1afb3687d..7d44ae8db 100644 --- a/external_libs/runtime/flash_attn/lib/alibi.h +++ b/external_libs/runtime/flash_attn/lib/alibi.h @@ -2,8 +2,8 @@ #include -#include #include +#include #include "utils.h" @@ -14,49 +14,49 @@ using namespace cute; //////////////////////////////////////////////////////////////////////////////////////////////////// template -inline __device__ void apply_alibi(Tensor &tensor, - const int col_idx_offset_, - const int max_seqlen_k, - const int row_idx_offset, - const int max_seqlen_q, - const int warp_row_stride, - const float alibi_slope) { - // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) - static_assert(Layout::rank == 2, "Only support 2D Tensor"); - const int lane_id = threadIdx.x % 32; - const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; - if constexpr (Is_causal) { // Simpler, we add the same bias vector to all rows - #pragma unroll - for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { - const int col_idx_base = col_idx_offset + nj * 8; - #pragma unroll - for (int j = 0; j < size<1, 0>(tensor); ++j) { - const int col_idx = col_idx_base + j; - #pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - tensor(mi, make_coord(j, nj)) += alibi_slope * col_idx; - } - } +inline __device__ void +apply_alibi(Tensor &tensor, const int col_idx_offset_, + const int max_seqlen_k, const int row_idx_offset, + const int max_seqlen_q, const int warp_row_stride, + const float alibi_slope) { + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout::rank == 2, "Only support 2D Tensor"); + const int lane_id = threadIdx.x % 32; + const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; + if constexpr (Is_causal) { // Simpler, we add the same bias vector to all rows +#pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; +#pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; +#pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + tensor(mi, make_coord(j, nj)) += alibi_slope * col_idx; } - } else { // Bias depends on both row_idx and col_idx - #pragma unroll - for (int mi = 0; mi < size<0, 1>(tensor); ++mi) { - const int row_idx_base = row_idx_offset + mi * warp_row_stride; - #pragma unroll - for (int i = 0; i < size<0, 0>(tensor); ++i) { - const int row_idx = row_idx_base + i * 8; - #pragma unroll - for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { - const int col_idx_base = col_idx_offset + nj * 8; - #pragma unroll - for (int j = 0; j < size<1, 0>(tensor); ++j) { - const int col_idx = col_idx_base + j; - tensor(make_coord(i, mi), make_coord(j, nj)) -= alibi_slope * abs(row_idx + max_seqlen_k - max_seqlen_q - col_idx); - } - } - } + } + } + } else { // Bias depends on both row_idx and col_idx +#pragma unroll + for (int mi = 0; mi < size<0, 1>(tensor); ++mi) { + const int row_idx_base = row_idx_offset + mi * warp_row_stride; +#pragma unroll + for (int i = 0; i < size<0, 0>(tensor); ++i) { + const int row_idx = row_idx_base + i * 8; +#pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; +#pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; + tensor(make_coord(i, mi), make_coord(j, nj)) -= + alibi_slope * + abs(row_idx + max_seqlen_k - max_seqlen_q - col_idx); + } } + } } + } } -} // namespace flash +} // namespace flash diff --git a/external_libs/runtime/flash_attn/lib/block_info.h b/external_libs/runtime/flash_attn/lib/block_info.h index 65435e51a..bcd8242eb 100644 --- a/external_libs/runtime/flash_attn/lib/block_info.h +++ b/external_libs/runtime/flash_attn/lib/block_info.h @@ -8,39 +8,58 @@ namespace flash { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct BlockInfo { - - template - __device__ BlockInfo(const Params ¶ms, const int bidb) - : sum_s_q(!Varlen || params.cu_seqlens_q == nullptr ? -1 : params.cu_seqlens_q[bidb]) - , sum_s_k(!Varlen || params.cu_seqlens_k == nullptr || !params.is_seqlens_k_cumulative ? -1 : params.cu_seqlens_k[bidb]) - , actual_seqlen_q(!Varlen || params.cu_seqlens_q == nullptr ? params.seqlen_q : params.cu_seqlens_q[bidb + 1] - sum_s_q) - // If is_seqlens_k_cumulative, then seqlen_k is cu_seqlens_k[bidb + 1] - cu_seqlens_k[bidb]. - // Otherwise it's cu_seqlens_k[bidb], i.e., we use cu_seqlens_k to store the sequence lengths of K. - , seqlen_k_cache(!Varlen || params.cu_seqlens_k == nullptr ? params.seqlen_k : (params.is_seqlens_k_cumulative ? params.cu_seqlens_k[bidb + 1] - sum_s_k : params.cu_seqlens_k[bidb])) - , actual_seqlen_k(params.seqused_k ? params.seqused_k[bidb] : seqlen_k_cache + (params.knew_ptr == nullptr ? 0 : params.seqlen_knew)) - { - } - - template - inline __device__ index_t q_offset(const index_t batch_stride, const index_t row_stride, const int bidb) const { - return sum_s_q == -1 ? bidb * batch_stride : uint32_t(sum_s_q) * row_stride; - } - - template - inline __device__ index_t k_offset(const index_t batch_stride, const index_t row_stride, const int bidb) const { - return sum_s_k == -1 ? bidb * batch_stride : uint32_t(sum_s_k) * row_stride; - } - - const int sum_s_q; - const int sum_s_k; - const int actual_seqlen_q; - // We have to have seqlen_k_cache declared before actual_seqlen_k, otherwise actual_seqlen_k is set to 0. - const int seqlen_k_cache; - const int actual_seqlen_k; +template struct BlockInfo { + + template + __device__ BlockInfo(const Params ¶ms, const int bidb) + : sum_s_q(!Varlen || params.cu_seqlens_q == nullptr + ? -1 + : params.cu_seqlens_q[bidb]), + sum_s_k(!Varlen || params.cu_seqlens_k == nullptr || + !params.is_seqlens_k_cumulative + ? -1 + : params.cu_seqlens_k[bidb]), + actual_seqlen_q(!Varlen || params.cu_seqlens_q == nullptr + ? params.seqlen_q + : params.cu_seqlens_q[bidb + 1] - sum_s_q) + // If is_seqlens_k_cumulative, then seqlen_k is cu_seqlens_k[bidb + 1] - + // cu_seqlens_k[bidb]. Otherwise it's cu_seqlens_k[bidb], i.e., we use + // cu_seqlens_k to store the sequence lengths of K. + , + seqlen_k_cache(!Varlen || params.cu_seqlens_k == nullptr + ? params.seqlen_k + : (params.is_seqlens_k_cumulative + ? params.cu_seqlens_k[bidb + 1] - sum_s_k + : params.cu_seqlens_k[bidb])), + actual_seqlen_k(params.seqused_k + ? params.seqused_k[bidb] + : seqlen_k_cache + (params.knew_ptr == nullptr + ? 0 + : params.seqlen_knew)) {} + + template + inline __device__ index_t q_offset(const index_t batch_stride, + const index_t row_stride, + const int bidb) const { + return sum_s_q == -1 ? bidb * batch_stride : uint32_t(sum_s_q) * row_stride; + } + + template + inline __device__ index_t k_offset(const index_t batch_stride, + const index_t row_stride, + const int bidb) const { + return sum_s_k == -1 ? bidb * batch_stride : uint32_t(sum_s_k) * row_stride; + } + + const int sum_s_q; + const int sum_s_k; + const int actual_seqlen_q; + // We have to have seqlen_k_cache declared before actual_seqlen_k, otherwise + // actual_seqlen_k is set to 0. + const int seqlen_k_cache; + const int actual_seqlen_k; }; //////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace flash +} // namespace flash diff --git a/external_libs/runtime/flash_attn/lib/flash_api.cu b/external_libs/runtime/flash_attn/lib/flash_api.cu index d05b383f1..b94faf626 100644 --- a/external_libs/runtime/flash_attn/lib/flash_api.cu +++ b/external_libs/runtime/flash_attn/lib/flash_api.cu @@ -592,8 +592,9 @@ void run_flash_attn_fwd(void **tensors, void *extra_args, cudaStream_t stream) { auto window_size_right = static_cast(getIntFromVoidPtr(extra_args, pos)); - run_mha(tensors[0], tensors[1], tensors[2], tensors[3], tensors[4], - tensors[5], tensors[6], + // tensors: q, k, v, rng_state, o, softmax_lse, softmax_sum + run_mha(tensors[0], tensors[1], tensors[2], tensors[4], tensors[5], + tensors[6], tensors[3], /*q_batch_stride*/ q_batch_stride, /*k_batch_stride*/ k_batch_stride, /*v_batch_stride*/ v_batch_stride, diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h b/external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h index 21212410a..d1495749b 100644 --- a/external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_kernel.h @@ -6,14 +6,14 @@ #include -#include #include +#include #include #include "block_info.h" #include "kernel_traits.h" -#include "utils.h" #include "softmax.h" +#include "utils.h" #include "alibi.h" @@ -23,1616 +23,2042 @@ using namespace cute; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -CUTE_HOST_DEVICE -auto -make_tiled_copy_B_warpcontiguousN(Copy_Atom const& copy_atom, - TiledMMA const& tiled_mma) { - using TileShape_MNK = typename TiledMMA::TiledShape_MNK; - using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; - constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; - // Divide by 2 because right now we always use 2 for the ValLayout - constexpr int kNWarpsN = decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; - constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; - // This gives the correct layout, idk why. - // auto t = make_tile(Layout, _2>, - // Stride, _8> >{}, - // auto t = make_tile(Layout, - // Stride<_1, _64, _8> >{}, - auto t = make_tile(Layout, Int, _2>, // (8, 2, 2) or (8, 4, 2) - Stride<_1, Int, _8> >{}, // (1, 64, 8) or (1, 32, 8) - make_layout(size<2>(TileShape_MNK{}))); - // if (cute::thread0()) {printf("make_tiled_copy_B_warpcontiguousN "); print(t); printf("\n"); } - return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutB_TV(), t); +template +CUTE_HOST_DEVICE auto +make_tiled_copy_B_warpcontiguousN(Copy_Atom const ©_atom, + TiledMMA const &tiled_mma) { + using TileShape_MNK = typename TiledMMA::TiledShape_MNK; + using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; + constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; + // Divide by 2 because right now we always use 2 for the ValLayout + constexpr int kNWarpsN = + decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; + constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; + // This gives the correct layout, idk why. + // auto t = make_tile(Layout, _2>, + // Stride, _8> >{}, + // auto t = make_tile(Layout, + // Stride<_1, _64, _8> >{}, + auto t = make_tile( + Layout, Int, _2>, // (8, 2, 2) or (8, 4, + // 2) + Stride<_1, Int, _8>>{}, // (1, 64, 8) or (1, 32, 8) + make_layout(size<2>(TileShape_MNK{}))); + // if (cute::thread0()) {printf("make_tiled_copy_B_warpcontiguousN "); + // print(t); printf("\n"); } + return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutB_TV(), t); } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -CUTE_HOST_DEVICE -auto -make_tiled_copy_C_warpcontiguousN(Copy_Atom const& copy_atom, - TiledMMA const& tiled_mma) { - using TileShape_MNK = typename TiledMMA::TiledShape_MNK; - using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; - constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; - // Divide by 2 because right now we always use 2 for the ValLayout - constexpr int kNWarpsN = decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; - constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; - auto t = make_tile(make_layout(size<0>(TileShape_MNK{})), - Layout, Int, _2>, // (8, 2, 2) or (8, 4, 2) - Stride<_1, Int, _8> >{}); // (1, 64, 8) or (1, 32, 8) - // if (cute::thread0()) {printf("make_tiled_copy_C_warpcontiguousN "); print(t); printf("\n"); } - return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutC_TV(), t); +template +CUTE_HOST_DEVICE auto +make_tiled_copy_C_warpcontiguousN(Copy_Atom const ©_atom, + TiledMMA const &tiled_mma) { + using TileShape_MNK = typename TiledMMA::TiledShape_MNK; + using AtomShape_MNK = typename TiledMMA::AtomShape_MNK; + constexpr int AtomShape_N = decltype(size<1>(AtomShape_MNK{}))::value; + // Divide by 2 because right now we always use 2 for the ValLayout + constexpr int kNWarpsN = + decltype(size<1>(TileShape_MNK{}))::value / AtomShape_N / 2; + constexpr int MMAStride_N = MMA_N * AtomShape_N * 2; + auto t = make_tile( + make_layout(size<0>(TileShape_MNK{})), + Layout, Int, _2>, // (8, 2, 2) or (8, 4, + // 2) + Stride<_1, Int, _8>>{}); // (1, 64, 8) or (1, 32, 8) + // if (cute::thread0()) {printf("make_tiled_copy_C_warpcontiguousN "); + // print(t); printf("\n"); } + return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutC_TV(), t); } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void dot_do_o(Tensor const &do_, Tensor const &o, - Tensor &dP_sum, const int gdP_col_stride, const float scale) { - static_assert(Layout0::rank == 3, "Only support 3D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(do_.layout() == o.layout()); - // Reshape do_ and o from (8, kBlockM / 32, kHeadDim / 64) to (kBlockM / 32, 8 * kHeadDim / 64) - // The last coordinate is the "page". - Tensor do_reshaped = make_tensor(do_.data(), make_layout(get<1>(do_.layout()), - make_layout(get<0>(do_.layout()), - get<2>(do_.layout())))); - Tensor o_reshaped = make_tensor(o.data(), do_reshaped.layout()); - Tensor do_fp32 = flash::convert_type(do_reshaped); - Tensor o_fp32 = flash::convert_type(o_reshaped); - #pragma unroll - for (int mi = 0; mi < size<0>(do_reshaped); ++mi) { - float dP_sum_cur = do_fp32(mi, 0) * o_fp32(mi, 0); - #pragma unroll - for (int ni = 1; ni < size<1>(do_reshaped); ni++) { - dP_sum_cur += do_fp32(mi, ni) * o_fp32(mi, ni); - } - flash::SumOp sum_op; - dP_sum_cur = flash::Allreduce::run(dP_sum_cur, sum_op) * scale; - if (threadIdx.x % THREADS_PER_ROW == 0) { - dP_sum(mi * gdP_col_stride + threadIdx.x / THREADS_PER_ROW) = dP_sum_cur; - } +template +inline __device__ void dot_do_o(Tensor const &do_, + Tensor const &o, + Tensor &dP_sum, + const int gdP_col_stride, const float scale) { + static_assert(Layout0::rank == 3, "Only support 3D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(do_.layout() == o.layout()); + // Reshape do_ and o from (8, kBlockM / 32, kHeadDim / 64) to (kBlockM / 32, 8 + // * kHeadDim / 64) The last coordinate is the "page". + Tensor do_reshaped = make_tensor( + do_.data(), + make_layout(get<1>(do_.layout()), + make_layout(get<0>(do_.layout()), get<2>(do_.layout())))); + Tensor o_reshaped = make_tensor(o.data(), do_reshaped.layout()); + Tensor do_fp32 = flash::convert_type(do_reshaped); + Tensor o_fp32 = flash::convert_type(o_reshaped); +#pragma unroll + for (int mi = 0; mi < size<0>(do_reshaped); ++mi) { + float dP_sum_cur = do_fp32(mi, 0) * o_fp32(mi, 0); +#pragma unroll + for (int ni = 1; ni < size<1>(do_reshaped); ni++) { + dP_sum_cur += do_fp32(mi, ni) * o_fp32(mi, ni); + } + flash::SumOp sum_op; + dP_sum_cur = + flash::Allreduce::run(dP_sum_cur, sum_op) * scale; + if (threadIdx.x % THREADS_PER_ROW == 0) { + dP_sum(mi * gdP_col_stride + threadIdx.x / THREADS_PER_ROW) = dP_sum_cur; } + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -// Just compute dot(do, o) and write the result (softmax_d) to global memory as a separate kernel. -// This is used in the case where we want to parallelize the backward across seqlen_k. -template +// Just compute dot(do, o) and write the result (softmax_d) to global memory as +// a separate kernel. This is used in the case where we want to parallelize the +// backward across seqlen_k. +template inline __device__ void compute_dot_do_o(const Params ¶ms) { - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - const int m_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (m_block * kBlockM >= binfo.actual_seqlen_q) return; - - const index_t row_offset_do = binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) - + m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; - const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) - + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; - const index_t row_offset_dq_accum = binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, params.h * params.d_rounded, bidb) - + (m_block * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * params.h * params.d_rounded + bidh * params.d_rounded; - const index_t row_offset_dpsum = (bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM; - - Tensor gdO = make_tensor(make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), - Shape, Int>{}, - make_stride(params.do_row_stride, _1{})); - Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), - Shape, Int>{}, - make_stride(params.o_row_stride, _1{})); - Tensor gdQaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + row_offset_dq_accum), - Shape, Int>{}, - make_stride(params.h * params.d_rounded, _1{})); - Tensor dP_sum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + row_offset_dpsum), - Shape>{}, Stride<_1>{}); - - typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; - auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); - // TODO: careful, we're zeroing out dQaccum with type float4, but when - // we do atomicAdds, we use type float. The layouts are different. Check this. - typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dQaccum; - auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); - - Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); - Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); - Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); - - Tensor cdO = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdOcdO = gmem_thr_copy_dO.partition_S(cdO); - - // Allocate predicate tensors for k - Tensor tdOpdO = make_tensor(make_shape(size<2>(tdOgdO))); - // Set predicates for k bounds - #pragma unroll - for (int k = 0; k < size(tdOpdO); ++k) {tdOpdO(k) = get<1>(tdOcdO(0, 0, k)) < params.d;} - - Tensor tdOrdO = make_fragment_like(tdOgdO); - Tensor tdOrO = make_fragment_like(tdOgO); - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOrdO, tdOcdO, tdOpdO, binfo.actual_seqlen_q - m_block * kBlockM - ); - flash::copy( - gmem_tiled_copy_dO, tdOgO, tdOrO, tdOcdO, tdOpdO, binfo.actual_seqlen_q - m_block * kBlockM - ); - // By right we need to scale dP up by 1/p_dropout, but instead we don't and only scale the final - // results (dQ and dK) by 1/p_dropout. So we need to keep dP_sum scaled down by p_dropout here, - // so that (dP - dP_sum) is on the same scale. - dot_do_o(tdOrdO, tdOrO, dP_sum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout); - if (Clear_dQaccum) { - // We're actually not zero'ing out all of dQaccum, but only the part that we're going to - // do atomicAdds on. - Tensor zero = make_fragment_like(tdQgdQaccum); - clear(zero); - cute::copy(gmem_tiled_copy_dQaccum, zero, tdQgdQaccum); - } + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (m_block * kBlockM >= binfo.actual_seqlen_q) + return; + + const index_t row_offset_do = + binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + + m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; + const index_t row_offset_o = + binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + const index_t row_offset_dq_accum = + binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, + params.h * params.d_rounded, bidb) + + (m_block * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * + params.h * params.d_rounded + + bidh * params.d_rounded; + const index_t row_offset_dpsum = + (bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM; + + Tensor gdO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), + Shape, Int>{}, + make_stride(params.do_row_stride, _1{})); + Tensor gO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gdQaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + + row_offset_dq_accum), + Shape, Int>{}, + make_stride(params.h * params.d_rounded, _1{})); + Tensor dP_sum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + + row_offset_dpsum), + Shape>{}, Stride<_1>{}); + + typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; + auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); + // TODO: careful, we're zeroing out dQaccum with type float4, but when + // we do atomicAdds, we use type float. The layouts are different. Check this. + typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dQaccum; + auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); + + Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); + Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); + Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); + + Tensor cdO = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdOcdO = gmem_thr_copy_dO.partition_S(cdO); + + // Allocate predicate tensors for k + Tensor tdOpdO = make_tensor(make_shape(size<2>(tdOgdO))); +// Set predicates for k bounds +#pragma unroll + for (int k = 0; k < size(tdOpdO); ++k) { + tdOpdO(k) = get<1>(tdOcdO(0, 0, k)) < params.d; + } + + Tensor tdOrdO = make_fragment_like(tdOgdO); + Tensor tdOrO = make_fragment_like(tdOgO); + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOrdO, tdOcdO, tdOpdO, + binfo.actual_seqlen_q - m_block * kBlockM); + flash::copy( + gmem_tiled_copy_dO, tdOgO, tdOrO, tdOcdO, tdOpdO, + binfo.actual_seqlen_q - m_block * kBlockM); + // By right we need to scale dP up by 1/p_dropout, but instead we don't and + // only scale the final results (dQ and dK) by 1/p_dropout. So we need to keep + // dP_sum scaled down by p_dropout here, so that (dP - dP_sum) is on the same + // scale. + dot_do_o( + tdOrdO, tdOrO, dP_sum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), + params.p_dropout); + if (Clear_dQaccum) { + // We're actually not zero'ing out all of dQaccum, but only the part that + // we're going to do atomicAdds on. + Tensor zero = make_fragment_like(tdQgdQaccum); + clear(zero); + cute::copy(gmem_tiled_copy_dQaccum, zero, tdQgdQaccum); + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template inline __device__ void clear_dKVaccum(const Params ¶ms) { - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - const int n_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (n_block * kBlockN >= binfo.actual_seqlen_k) return; - - const index_t row_offset_dkv_accum = ((bidb * params.h_k + bidh) * params.seqlen_k_rounded + n_block * kBlockN) * params.d_rounded; - - Tensor gdKaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - Tensor gdVaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + row_offset_dkv_accum), - Shape, Int>{}, Stride, _1>{}); - - typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dKVaccum; - auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); - Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); - Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); - Tensor zero = make_fragment_like(tdKgdKaccum); - clear(zero); - cute::copy(gmem_tiled_copy_dKVaccum, zero, tdKgdKaccum); - cute::copy(gmem_tiled_copy_dKVaccum, zero, tdVgdVaccum); + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + const int n_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (n_block * kBlockN >= binfo.actual_seqlen_k) + return; + + const index_t row_offset_dkv_accum = + ((bidb * params.h_k + bidh) * params.seqlen_k_rounded + + n_block * kBlockN) * + params.d_rounded; + + Tensor gdKaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + Tensor gdVaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + + typename Kernel_traits::GmemTiledCopydQaccum gmem_tiled_copy_dKVaccum; + auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); + Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); + Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); + Tensor zero = make_fragment_like(tdKgdKaccum); + clear(zero); + cute::copy(gmem_tiled_copy_dKVaccum, zero, tdKgdKaccum); + cute::copy(gmem_tiled_copy_dKVaccum, zero, tdVgdVaccum); } //////////////////////////////////////////////////////////////////////////////////////////////////// // Convert dQ from dQaccum (in float) to fp16/bf16. -// This is used in the case where we want to parallelize the backward across seqlen_k. -template +// This is used in the case where we want to parallelize the backward across +// seqlen_k. +template inline __device__ void convert_dQ(const Params ¶ms, const int nsplits) { - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - const int m_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (m_block * kBlockM >= binfo.actual_seqlen_q) return; - - const index_t row_offset_dq = binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) - + m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; - const index_t row_offset_dq_accum = binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, params.h * params.d_rounded, bidb) - + (m_block * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * params.h * params.d_rounded + bidh * params.d_rounded; - - Tensor gdQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), - Shape, Int>{}, - make_stride(params.dq_row_stride, _1{})); - Tensor gdQaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + row_offset_dq_accum), - Shape, Int>{}, - make_stride(params.h * params.d_rounded, _1{})); - - Tensor sdQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutdQ{}); - - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; - auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dQaccum; - auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); - - typename Kernel_traits::TiledMmadQ tiled_mma_dq; - auto smem_tiled_copy_dQ = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); - auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor taccdQsdQ = smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - Tensor tdQsdQ = gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); - Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_S(gdQaccum); - - Tensor acc_dq = partition_fragment_C(tiled_mma_dq, Shape, Int>{}); // MMA, MMA_N, MMA_K - CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); - - Tensor tdQrdQaccum = make_fragment_like(tdQgdQaccum); - clear(acc_dq); - for (int s = 0; s < nsplits; ++s) { - cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, tdQrdQaccum); - #pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) += tdQrdQaccum(i); } - tdQgdQaccum.data() = tdQgdQaccum.data() + params.dq_accum_split_stride; + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (m_block * kBlockM >= binfo.actual_seqlen_q) + return; + + const index_t row_offset_dq = + binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + + m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; + const index_t row_offset_dq_accum = + binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, + params.h * params.d_rounded, bidb) + + (m_block * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * + params.h * params.d_rounded + + bidh * params.d_rounded; + + Tensor gdQ = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), + Shape, Int>{}, + make_stride(params.dq_row_stride, _1{})); + Tensor gdQaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + + row_offset_dq_accum), + Shape, Int>{}, + make_stride(params.h * params.d_rounded, _1{})); + + Tensor sdQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutdQ{}); + + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; + auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dQaccum; + auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); + + typename Kernel_traits::TiledMmadQ tiled_mma_dq; + auto smem_tiled_copy_dQ = + make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); + auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor taccdQsdQ = + smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + Tensor tdQsdQ = + gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); + Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_S(gdQaccum); + + Tensor acc_dq = partition_fragment_C( + tiled_mma_dq, Shape, Int>{}); // MMA, MMA_N, MMA_K + CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); + + Tensor tdQrdQaccum = make_fragment_like(tdQgdQaccum); + clear(acc_dq); + for (int s = 0; s < nsplits; ++s) { + cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, tdQrdQaccum); +#pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { + acc_dq(i) += tdQrdQaccum(i); } - #pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) *= params.scale_softmax_rp_dropout; } - // Convert acc_dq from fp32 to fp16 - Tensor rdQ = flash::convert_type(acc_dq); - Tensor taccdQrdQ = smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); - __syncthreads(); - Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); - cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); - - Tensor cdQ = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); - Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); - #pragma unroll - for (int k = 0; k < size(tdQpdQ); ++k) { tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy( - gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, tdQpdQ, binfo.actual_seqlen_q - m_block * kBlockM - ); + tdQgdQaccum.data() = tdQgdQaccum.data() + params.dq_accum_split_stride; + } +#pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { + acc_dq(i) *= params.scale_softmax_rp_dropout; + } + // Convert acc_dq from fp32 to fp16 + Tensor rdQ = flash::convert_type(acc_dq); + Tensor taccdQrdQ = + smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); + __syncthreads(); + Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); + cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); + + Tensor cdQ = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); + Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); +#pragma unroll + for (int k = 0; k < size(tdQpdQ); ++k) { + tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy(gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, + tdQpdQ, + binfo.actual_seqlen_q - m_block * kBlockM); } //////////////////////////////////////////////////////////////////////////////////////////////////// // Convert dK and dV from dKaccum and dVaccum (in float) to fp16/bf16. -// This is used in the case where we want to parallelize the backward across seqlen_q. -template +// This is used in the case where we want to parallelize the backward across +// seqlen_q. +template inline __device__ void convert_dKV(const Params ¶ms) { - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - const int n_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - - const BlockInfo binfo(params, bidb); - if (n_block * kBlockN >= binfo.actual_seqlen_k) return; - - const index_t row_offset_dk = binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) - + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; - const index_t row_offset_dv = binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) - + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; - const index_t row_offset_dkv_accum = ((bidb * params.h_k + bidh) * params.seqlen_k_rounded - + n_block * kBlockN) * params.d_rounded; - - Tensor gdK = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), - Shape, Int>{}, - make_stride(params.dk_row_stride, _1{})); - Tensor gdV = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), - Shape, Int>{}, - make_stride(params.dv_row_stride, _1{})); - Tensor gdKaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + row_offset_dkv_accum), - Shape, Int>{}, - Stride, _1>{}); - Tensor gdVaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + row_offset_dkv_accum), - Shape, Int>{}, - Stride, _1>{}); - - Tensor sdK = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutdKV{}); - Tensor sdV = make_tensor(sdK.data() + size(sdK), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) - - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dKV; - auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dKVaccum; - auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + const int n_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + + const BlockInfo binfo(params, bidb); + if (n_block * kBlockN >= binfo.actual_seqlen_k) + return; + + const index_t row_offset_dk = + binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; + const index_t row_offset_dv = + binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; + const index_t row_offset_dkv_accum = + ((bidb * params.h_k + bidh) * params.seqlen_k_rounded + + n_block * kBlockN) * + params.d_rounded; + + Tensor gdK = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), + Shape, Int>{}, + make_stride(params.dk_row_stride, _1{})); + Tensor gdV = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), + Shape, Int>{}, + make_stride(params.dv_row_stride, _1{})); + Tensor gdKaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + Tensor gdVaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + + Tensor sdK = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutdKV{}); + Tensor sdV = + make_tensor(sdK.data() + size(sdK), + typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) + + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dKV; + auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd + gmem_tiled_copy_dKVaccum; + auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); + + typename Kernel_traits::TiledMmadKV tiled_mma_dkv; + auto smem_tiled_copy_dKV = make_tiled_copy_C( + typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); + auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor taccdKsdK = + smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) + Tensor taccdVsdV = + smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + Tensor tdKsdK = + gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); + Tensor tdVsdV = + gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); + Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_S(gdKaccum); + Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_S(gdVaccum); + + Tensor acc_dk = partition_fragment_C( + tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + Tensor acc_dv = partition_fragment_C( + tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + CUTE_STATIC_ASSERT_V(size(acc_dk) == size(tdKgdKaccum)); + CUTE_STATIC_ASSERT_V(size(acc_dv) == size(tdVgdVaccum)); + + Tensor tdKrdKaccum = make_fragment_like(tdKgdKaccum); + Tensor tdVrdVaccum = make_fragment_like(tdVgdVaccum); + cute::copy(gmem_tiled_copy_dKVaccum, tdKgdKaccum, tdKrdKaccum); + cute::copy(gmem_tiled_copy_dKVaccum, tdVgdVaccum, tdVrdVaccum); +#pragma unroll + for (int i = 0; i < size(acc_dk); ++i) { + acc_dk(i) = tdKrdKaccum(i) * params.scale_softmax_rp_dropout; + } +#pragma unroll + for (int i = 0; i < size(acc_dv); ++i) { + acc_dv(i) = tdVrdVaccum(i) * params.rp_dropout; + } + // Convert acc_dk from fp32 to fp16 + Tensor rdK = flash::convert_type(acc_dk); + Tensor rdV = flash::convert_type(acc_dv); + Tensor taccdKrdK = + smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdVrdV = + smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); + cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); + __syncthreads(); + Tensor tdKrdK = make_tensor(shape(tdKgdK)); + Tensor tdVrdV = make_tensor(shape(tdVgdV)); + cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); + cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); + + Tensor cdKV = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); + Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); +#pragma unroll + for (int k = 0; k < size(tdKVpdKV); ++k) { + tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy(gmem_tiled_copy_dKV, tdKrdK, tdKgdK, + tdKVcdKV, tdKVpdKV, + binfo.actual_seqlen_k - n_block * kBlockN); + flash::copy(gmem_tiled_copy_dKV, tdVrdV, tdVgdV, + tdKVcdKV, tdKVpdKV, + binfo.actual_seqlen_k - n_block * kBlockN); +} - typename Kernel_traits::TiledMmadKV tiled_mma_dkv; - auto smem_tiled_copy_dKV = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); - auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor taccdKsdK = smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) - Tensor taccdVsdV = smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) +//////////////////////////////////////////////////////////////////////////////////////////////////// - Tensor tdKsdK = gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); - Tensor tdVsdV = gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); - Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_S(gdKaccum); - Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_S(gdVaccum); - - Tensor acc_dk = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - Tensor acc_dv = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - CUTE_STATIC_ASSERT_V(size(acc_dk) == size(tdKgdKaccum)); - CUTE_STATIC_ASSERT_V(size(acc_dv) == size(tdVgdVaccum)); - - Tensor tdKrdKaccum = make_fragment_like(tdKgdKaccum); - Tensor tdVrdVaccum = make_fragment_like(tdVgdVaccum); - cute::copy(gmem_tiled_copy_dKVaccum, tdKgdKaccum, tdKrdKaccum); - cute::copy(gmem_tiled_copy_dKVaccum, tdVgdVaccum, tdVrdVaccum); - #pragma unroll - for (int i = 0; i < size(acc_dk); ++i) { - acc_dk(i) = tdKrdKaccum(i) * params.scale_softmax_rp_dropout; +template +inline __device__ void +compute_dq_dk_dv_1colblock(const Params ¶ms, const int bidb, const int bidh, + const int n_block) { + + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + // constexpr int kNWarps = Kernel_traits::kNWarps; + constexpr int MMA_N_SdP = + kBlockN / + decltype(size<1>( + typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; + constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; + constexpr bool Double_buffer = !Kernel_traits::No_double_buffer; + + const BlockInfo binfo(params, bidb); + if (n_block * kBlockN >= binfo.actual_seqlen_k) + return; + + int m_block_max = cute::ceil_div(binfo.actual_seqlen_q, kBlockM); + if (Is_local) { + m_block_max = std::min( + m_block_max, + cute::ceil_div((n_block + 1) * kBlockN + binfo.actual_seqlen_q - + binfo.actual_seqlen_k + params.window_size_left, + kBlockM)); + } + + const index_t row_offset_q = + binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.q_row_stride + + bidh * params.q_head_stride; + const index_t row_offset_k = + binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + + n_block * kBlockN * params.k_row_stride + + (bidh / params.h_h_k_ratio) * params.k_head_stride; + const index_t row_offset_v = + binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + + n_block * kBlockN * params.v_row_stride + + (bidh / params.h_h_k_ratio) * params.v_head_stride; + const index_t row_offset_do = + binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.do_row_stride + + bidh * params.do_head_stride; + const index_t row_offset_o = + binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.o_row_stride + + bidh * params.o_head_stride; + const index_t row_offset_dq = + binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + + (m_block_max - 1) * kBlockM * params.dq_row_stride + + bidh * params.dq_head_stride; + const index_t row_offset_dq_accum = + binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, + params.h * params.d_rounded, bidb) + + ((m_block_max - 1) * kBlockM + + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * + params.h * params.d_rounded + + bidh * params.d_rounded + // If deterministic, each thread block will do atomicAdd to a different + // dQ_accum buffer. + + (!params.deterministic ? 0 : blockIdx.x * params.dq_accum_split_stride); + const index_t row_offset_lse = + (bidb * params.h + bidh) * params.seqlen_q + (m_block_max - 1) * kBlockM; + const index_t row_offset_dpsum = + (bidb * params.h + bidh) * params.seqlen_q_rounded + + (m_block_max - 1) * kBlockM; + + Tensor gQ = make_tensor( + make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), + Shape, Int>{}, + make_stride(params.q_row_stride, _1{})); + Tensor gK = make_tensor( + make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), + Shape, Int>{}, + make_stride(params.k_row_stride, _1{})); + Tensor gV = make_tensor( + make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), + Shape, Int>{}, + make_stride(params.v_row_stride, _1{})); + Tensor gdO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), + Shape, Int>{}, + make_stride(params.do_row_stride, _1{})); + Tensor gO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gdQ = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), + Shape, Int>{}, + make_stride(params.dq_row_stride, _1{})); + Tensor gdQaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + + row_offset_dq_accum), + Shape, Int>{}, + make_stride(params.h * params.d_rounded, _1{})); + Tensor gLSE = make_tensor( + make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + + row_offset_lse), + Shape>{}, Stride<_1>{}); + Tensor gdPsum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + + row_offset_dpsum), + Shape>{}, Stride<_1>{}); + + Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutQdO{}); + Tensor sQt = + make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sQtNoSwizzle = make_tensor( + sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + // Double buffer for sQ + Tensor sdO = make_tensor(sQ.data() + (Double_buffer ? 2 : 1) * size(sQ), + typename Kernel_traits::SmemLayoutQdO{}); + Tensor sdOt = make_tensor(sdO.data(), + typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sdOtransposedNoSwizzle = make_tensor( + sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + Tensor sK = make_tensor(sdO.data() + size(sdO), + typename Kernel_traits::SmemLayoutKV{}); + Tensor sV = + make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{}); + Tensor sKt = + make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); + Tensor sKtNoSwizzle = make_tensor( + sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); + Tensor sdS = make_tensor(!Kernel_traits::Is_V_in_regs ? sV.data() + size(sV) + : sK.data() + size(sK), + typename Kernel_traits::SmemLayoutPdS{}); + Tensor sdSt = make_tensor(sdS.data(), + typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sdStNoSwizzle = make_tensor( + sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + Tensor sP = make_tensor(sdS.data() + size(sdS), + typename Kernel_traits::SmemLayoutPdS{}); + Tensor sPt = + make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sPtNoSwizzle = make_tensor( + sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + // sP and sdQ share the same memory so be careful + Tensor sdQ = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutdQ{}); + + typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; + auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); + using GmemTiledCopydO = + std::conditional_t; + GmemTiledCopydO gmem_tiled_copy_dO; + auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; + auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); + using GmemLayoutAtomdQaccum = + std::conditional_t; + GmemLayoutAtomdQaccum gmem_tiled_copy_dQaccum; + auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); + + Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); + Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); + Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); + Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); + Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); + Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) + Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); + Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) + Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); + Tensor tdQsdQ = + gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); + Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); + // if (cute::thread0()) { print(tdQgdQaccum.layout()); printf("\n"); } + // __syncthreads(); + // if (blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0 && tidx < 64) { + // printf("tidx = %d, tdQgdQaccum = 0x%p\n", tidx, tdQgdQaccum.data()); + // } + + typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; + auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); + Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) + Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) + Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) + Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) + + typename Kernel_traits::TiledMmadKV tiled_mma_dkv; + auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); + Tensor tdKrdSt = + thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdKrQt = + thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) + Tensor tdVrPt = + thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdVrdO = thr_mma_dkv.partition_fragment_B( + sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) + + typename Kernel_traits::TiledMmadQ tiled_mma_dq; + auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); + Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) + Tensor tdQrKt = + thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) + + Tensor acc_dk = partition_fragment_C( + tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + Tensor acc_dv = partition_fragment_C( + tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K + + // + // Copy Atom retiling + // + + auto smem_tiled_copy_QdO = + make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); + Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); + Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); + + // auto smem_thr_copy_KV = make_tiled_copy_B(typename + // Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp).get_thread_slice(tidx); + auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN( + typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); + Tensor tSsK = smem_thr_copy_KV.partition_S(sK); + // if (cute::thread(0, 0) && n_block == 0) { printf("sK layout: "); + // print(sK.layout()); printf("\n"); } if (cute::thread(0, 0) && n_block == 0) + // { print(tSsK.layout()); printf("\n"); } + Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); + + // Partition sP and sdS to match the accumulator partitioning + // This has to be tiled_mma_sdp, not tiled_mma_dkv + // auto smem_thr_copy_PdS = make_tiled_copy_C(typename + // Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp).get_thread_slice(tidx); + auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN( + typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); + auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); + Tensor tPsP = + smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) + // if (cute::thread(0, 0) && n_block == 0) { printf("sP layout: "); + // print(sP.layout()); printf("\n"); } if (cute::thread(0, 0) && n_block == 0) + // { print(tPsP.layout()); printf("\n"); } if (n_block == 0 && blockIdx.x == 0 + // && blockIdx.y == 0 && tidx < 64) { + // printf("tidx=%d, tPsP = 0x%p\n", tidx, tPsP.data()); + // } + Tensor tdSsdS = + smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + auto smem_tiled_copy_PdSt = make_tiled_copy_A( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); + Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); + Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); + + auto smem_tiled_copy_QdOt = make_tiled_copy_B( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); + Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); + Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); + + auto smem_tiled_copy_dS = + make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); + auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); + Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); + + auto smem_tiled_copy_Kt = make_tiled_copy_B( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); + auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); + Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); + + auto smem_tiled_copy_dQ = + make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); + auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor taccdQsdQ = + smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + // + // PREDICATES + // + + Tensor cQ = make_identity_tensor( + make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor cKV = make_identity_tensor( + make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + Tensor tQcQ = gmem_thr_copy_QKV.partition_D(cQ); + Tensor tKVcKV = gmem_thr_copy_QKV.partition_D(cKV); + + // Allocate predicate tensors for k + Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); + Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); + + // Set predicates for k bounds + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tQpQ); ++k) { + tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; } - #pragma unroll - for (int i = 0; i < size(acc_dv); ++i) { - acc_dv(i) = tdVrdVaccum(i) * params.rp_dropout; +#pragma unroll + for (int k = 0; k < size(tKVpKV); ++k) { + tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; } - // Convert acc_dk from fp32 to fp16 - Tensor rdK = flash::convert_type(acc_dk); - Tensor rdV = flash::convert_type(acc_dv); - Tensor taccdKrdK = smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdVrdV = smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); - cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); - __syncthreads(); + } + + // Prologue + + // We'll advance gdQ and gdQaccum before the 1st read/write. + tdQgdQ.data() = tdQgdQ.data() + kBlockM * params.dq_row_stride; + tdQgdQaccum.data() = + tdQgdQaccum.data() + kBlockM * params.h * params.d_rounded; + + int m_block = m_block_max - 1; + int m_block_min = + (!Is_causal && !Is_local) + ? 0 + : std::max(0, (n_block * kBlockN + binfo.actual_seqlen_q - + binfo.actual_seqlen_k - params.window_size_right) / + kBlockM); + // If not local, we're guaranteed that m_block_min <= m_block: + // We checked earlier that n_block * kBlockN < actual_seqlen_k, so in the + // causal case, n_block * kBlockN + binfo.actual_seqlen_q - + // binfo.actual_seqlen_k < actual_seqlen_q. So m_block_min <= (actual_seqlen_q + // - 1) / kBlockM. Recall that m_block_max = + // cute::ceil_div(binfo.actual_seqlen_q, kBlockM) = (actual_seqlen_q + kBlockM + // - 1) / kBlockM. So m_block_m - 1 = (actual_seqlen_q - 1) / kBlockM. We + // conclude that m_block_min <= m_block, so we will always have at least 1 + // iteration of the for loop. However, if local, then this possible to have + // some blocks of K & V not attending to any query. We might need to exit + // early and write 0 to dK and dV for those blocks. Otherwise we get wrong + // result for the case where we don't enter the for loop. And we might read + // OOB elements from gQ and gdO. This also covers the case where + // actual_seqlen_q == 0 + if ((Is_local || !Is_even_MN) && m_block < m_block_min) { + const index_t row_offset_dk = + binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; + const index_t row_offset_dv = + binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; + Tensor gdK = + make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + + row_offset_dk), + Shape, Int>{}, + make_stride(params.dk_row_stride, _1{})); + Tensor gdV = + make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + + row_offset_dv), + Shape, Int>{}, + make_stride(params.dv_row_stride, _1{})); + typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; + auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); + Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); Tensor tdKrdK = make_tensor(shape(tdKgdK)); Tensor tdVrdV = make_tensor(shape(tdVgdV)); - cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); - cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); - - Tensor cdKV = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + clear(tdKrdK); + clear(tdVrdV); + Tensor cdKV = make_identity_tensor(make_shape( + size<0>(gdK), size<1>(gdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); - #pragma unroll - for (int k = 0; k < size(tdKVpdKV); ++k) { tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; } +#pragma unroll + for (int k = 0; k < size(tdKVpdKV); ++k) { + tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; + } // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy( - gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - flash::copy( - gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN - ); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -inline __device__ void compute_dq_dk_dv_1colblock(const Params ¶ms, const int bidb, const int bidh, const int n_block) { + flash::copy( + gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, + binfo.actual_seqlen_k - n_block * kBlockN); + flash::copy( + gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, + binfo.actual_seqlen_k - n_block * kBlockN); + return; + } + + if (Double_buffer && m_block % 2 == 1) { // Double buffer for sQ + tQsQ.data() = tQsQ.data() + size(sQ); + tSsQ.data() = tSsQ.data() + size(sQ); + tdKsQt.data() = tdKsQt.data() + size(sQ); + } + + if ((!Is_first && !Seq_parallel) || params.deterministic) { + __syncthreads(); + } - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; + if (Kernel_traits::Is_V_in_regs) { + // Clear the smem tiles to account for predicated off loads + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + flash::cp_async_fence(); + } - // Shared memory. - extern __shared__ char smem_[]; + Tensor tdOrdO = make_fragment_like(tdOgdO); + Tensor tdOrO = make_fragment_like(tdOgO); + if (!Is_first) { + // Clear the smem tiles to account for predicated off loads + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOsdO, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + } else { + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + flash::copy( + gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + } + flash::copy( + gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + + Tensor caccS = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) + Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) + static_assert(decltype(size<0>(taccScS))::value == 4); + // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. + Tensor taccScS_row = + logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); + Tensor lse = make_tensor( + Shape>{}); +#pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + const int row = get<0>(taccScS_row(mi)); + lse(mi) = Is_even_MN || row < binfo.actual_seqlen_q - m_block * kBlockM + ? gLSE(row) + : INFINITY; + } + // We want LSE = inf if the row is OOB. In that case Q would be zero, K would + // be zero, and scores would be zero. With LSE = 0, probs will be all 1's, and + // when we multiply with V (which would be zero), we're fine. However, with + // ALiBi, we might modify these scores, and probs can become NaN. Instead if + // we set LSE = inf for OOB rows, probs are always 0. + + // Tensor tKrK = make_fragment_like(tKsK); + // // cute::copy(gmem_tiled_copy_QKV, tKgK(_, _, _, 0), tKrK); + // cute::copy(gmem_tiled_copy_QKV, tKgK, tKrK); + // // if (cute::thread(1, 0)) { print(tKrK); } + + flash::copy( + gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + if (!Kernel_traits::Is_V_in_regs) { + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + } + flash::cp_async_fence(); + + // if (cute::thread0()) { print(tdOgdO.layout()); printf("\n"); print(tdOrdO); + // print(tdOrO); } + if (Is_first) { + cute::copy(tdOrdO, tdOsdO); + dot_do_o( + tdOrdO, tdOrO, gdPsum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), + params.p_dropout); + } - // The thread index. - const int tidx = threadIdx.x; + if (Kernel_traits::Is_V_in_regs) { + cute::cp_async_wait<1>(); + __syncthreads(); + Tensor tdPrV_copy_view = smem_thr_copy_KV.retile_D(tdPrV); + CUTE_STATIC_ASSERT_V(size<1>(tdPsV) == size<1>(tdPrV_copy_view)); // M + cute::copy(smem_tiled_copy_KV, tdPsV, tdPrV_copy_view); + } + + auto seed = params.rng_state[0]; + auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; + + clear(acc_dv); + clear(acc_dk); + + float alibi_slope = + !Has_alibi ? 0.0f + : reinterpret_cast(params.alibi_slopes_ptr) + [bidb * params.alibi_slopes_batch_stride + bidh] / + params.scale_softmax; + + for (; m_block >= m_block_min; --m_block) { + Tensor acc_s = partition_fragment_C( + tiled_mma_sdp, + Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) + clear(acc_s); + cute::cp_async_wait<0>(); + __syncthreads(); - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - // constexpr int kNWarps = Kernel_traits::kNWarps; - constexpr int MMA_N_SdP = kBlockN / decltype(size<1>(typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; - constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; - constexpr bool Double_buffer = !Kernel_traits::No_double_buffer; + Tensor dP_sum = make_fragment_like(lse); +#pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + dP_sum(mi) = gdPsum(get<0>(taccScS_row(mi))); + } - const BlockInfo binfo(params, bidb); - if (n_block * kBlockN >= binfo.actual_seqlen_k) return; + // if (cute::thread0()) { print(sK); } + // Tensor tSrK_copy_view = smem_thr_copy_KV.retile_D(tSrK); + // #pragma unroll + // for (int k = 0; k < size<2>(tSrK_copy_view); ++k) { + // cute::copy(smem_tiled_copy_KV, tSsK(_, _, k), tSrK_copy_view(_, _, + // k)); + // } + // if (cute::thread0()) { print(tSrK); } + flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, + smem_thr_copy_KV); + + // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, + // MMA_N)) + Tensor scores = make_tensor( + acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + // if (cute::thread(32, 0)) { print(scores); } + + if (Has_alibi) { + flash::apply_alibi( + scores, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, AtomLayoutMS * 16, alibi_slope); + } - int m_block_max = cute::ceil_div(binfo.actual_seqlen_q, kBlockM); - if (Is_local) { - m_block_max = std::min(m_block_max, cute::ceil_div((n_block + 1) * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k + params.window_size_left, kBlockM)); + // TD [2023-07-29]: I was thinking that we don't need to mask out the + // elements beyond actual_seqlen_k, because acc_s would be some finite value + // for those indices. In the end when we multiply with K to get dQ, the + // corresponding values of K would be 0, so the result would still be + // correct. However, it's possible that the values in acc_s are so large + // that they overflow when we multiply with dP and convert to fp16, + // resulting in Inf in dS and NaNs in dQ. So we need to mask out the + // elements beyond actual_seqlen_k. + if (!Is_causal && !Is_local) { + if (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k) { + flash::apply_mask(scores, binfo.actual_seqlen_k, + n_block * kBlockN + + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16); + } + } else if (Is_causal) { + // Putting this causal masking right after acc_s is *much* slower for some + // reason. TD [2023-08-16]: We need the 2nd condition because if seqlen_q + // is long and seqlen_k is short (e.g., 256 and 2), the 2nd block of + // seqlen_q (from 128 to 255), we're not doing causal masking. But we + // still want to mask out elements beyond actual_seqlen_k. + if (m_block * kBlockM < (n_block + 1) * kBlockN + binfo.actual_seqlen_q - + binfo.actual_seqlen_k || + (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k)) { + flash::apply_mask_causal( + scores, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, + // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % + // AtomLayoutMS * 16 + (tidx % 32) / 4, + AtomLayoutMS * 16); + } + } else if (Is_local) { + if (m_block * kBlockM < (n_block + 1) * kBlockN + binfo.actual_seqlen_q - + binfo.actual_seqlen_k - + params.window_size_right || + (m_block + 1) * kBlockM >= n_block * kBlockN + binfo.actual_seqlen_q - + binfo.actual_seqlen_k + + params.window_size_left || + (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k)) { + flash::apply_mask_local( + scores, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, AtomLayoutMS * 16, params.window_size_left, + params.window_size_right); + } } - const index_t row_offset_q = binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) - + (m_block_max - 1) * kBlockM * params.q_row_stride + bidh * params.q_head_stride; - const index_t row_offset_k = binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) - + n_block * kBlockN * params.k_row_stride + (bidh / params.h_h_k_ratio) * params.k_head_stride; - const index_t row_offset_v = binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) - + n_block * kBlockN * params.v_row_stride + (bidh / params.h_h_k_ratio) * params.v_head_stride; - const index_t row_offset_do = binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) - + (m_block_max - 1) * kBlockM * params.do_row_stride + bidh * params.do_head_stride; - const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) - + (m_block_max - 1) * kBlockM * params.o_row_stride + bidh * params.o_head_stride; - const index_t row_offset_dq = binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) - + (m_block_max - 1) * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; - const index_t row_offset_dq_accum = binfo.q_offset(params.seqlen_q_rounded * params.h * params.d_rounded, params.h * params.d_rounded, bidb) - + ((m_block_max - 1) * kBlockM + (params.cu_seqlens_q == nullptr ? 0 : 128 * bidb)) * params.h * params.d_rounded + bidh * params.d_rounded - // If deterministic, each thread block will do atomicAdd to a different dQ_accum buffer. - + (!params.deterministic ? 0 : blockIdx.x * params.dq_accum_split_stride); - const index_t row_offset_lse = (bidb * params.h + bidh) * params.seqlen_q - + (m_block_max - 1) * kBlockM; - const index_t row_offset_dpsum = (bidb * params.h + bidh) * params.seqlen_q_rounded - + (m_block_max - 1) * kBlockM; - - Tensor gQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), - Shape, Int>{}, - make_stride(params.q_row_stride, _1{})); - Tensor gK = make_tensor(make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), - Shape, Int>{}, - make_stride(params.k_row_stride, _1{})); - Tensor gV = make_tensor(make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), - Shape, Int>{}, - make_stride(params.v_row_stride, _1{})); - Tensor gdO = make_tensor(make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), - Shape, Int>{}, - make_stride(params.do_row_stride, _1{})); - Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), - Shape, Int>{}, - make_stride(params.o_row_stride, _1{})); - Tensor gdQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), - Shape, Int>{}, - make_stride(params.dq_row_stride, _1{})); - Tensor gdQaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_accum_ptr) + row_offset_dq_accum), - Shape, Int>{}, - make_stride(params.h * params.d_rounded, _1{})); - Tensor gLSE = make_tensor(make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + row_offset_lse), - Shape>{}, Stride<_1>{}); - Tensor gdPsum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dsoftmax_sum) + row_offset_dpsum), - Shape>{}, Stride<_1>{}); - - Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutQdO{}); - Tensor sQt = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sQtNoSwizzle = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - // Double buffer for sQ - Tensor sdO = make_tensor(sQ.data() + (Double_buffer ? 2 : 1) * size(sQ), typename Kernel_traits::SmemLayoutQdO{}); - Tensor sdOt = make_tensor(sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sdOtransposedNoSwizzle = make_tensor(sdO.data(), - typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - Tensor sK = make_tensor(sdO.data() + size(sdO), typename Kernel_traits::SmemLayoutKV{}); - Tensor sV = make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{}); - Tensor sKt = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); - Tensor sKtNoSwizzle = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); - Tensor sdS = make_tensor(!Kernel_traits::Is_V_in_regs ? sV.data() + size(sV) : sK.data() + size(sK), - typename Kernel_traits::SmemLayoutPdS{}); - Tensor sdSt = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sdStNoSwizzle = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - Tensor sP = make_tensor(sdS.data() + size(sdS), typename Kernel_traits::SmemLayoutPdS{}); - Tensor sPt = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sPtNoSwizzle = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - // sP and sdQ share the same memory so be careful - Tensor sdQ = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutdQ{}); - - typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; - auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); - using GmemTiledCopydO = std::conditional_t< - Is_first, - typename Kernel_traits::GmemTiledCopydO, - typename Kernel_traits::GmemTiledCopyQKV - >; - GmemTiledCopydO gmem_tiled_copy_dO; - auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; - auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); - using GmemLayoutAtomdQaccum = std::conditional_t< - !Seq_parallel, - typename Kernel_traits::GmemTiledCopydQaccum, - typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd - >; - GmemLayoutAtomdQaccum gmem_tiled_copy_dQaccum; - auto gmem_thr_copy_dQaccum = gmem_tiled_copy_dQaccum.get_thread_slice(tidx); - - Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); - Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); - Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); - Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); - Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); - Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) - Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); - Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) - Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); - Tensor tdQsdQ = gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); - Tensor tdQgdQaccum = gmem_thr_copy_dQaccum.partition_D(gdQaccum); - // if (cute::thread0()) { print(tdQgdQaccum.layout()); printf("\n"); } + // if (cute::thread(32, 0)) { print(scores); } + // Compute the exponential value. + flash::scale_apply_exp2(scores, lse, + params.scale_softmax_log2); + if (Is_dropout) { + int warp_id = tidx / 32; + int block_row_idx = m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; + // Need col to be multiples of 32, since we're doing dropout with block of + // 16 x 32 + static_assert(MMA_N_SdP % 2 == 0); + int block_col_idx = + n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); + Tensor scores_dropped = make_tensor( + scores.data(), + flash::convert_layout_rowcol_Aregs( + scores.layout())); + flash::apply_dropout( + scores_dropped, params.p_dropout_in_uint8_t, seed, offset, + block_row_idx, block_col_idx, AtomLayoutMS); + } + // Convert scores from fp32 to fp16/bf16 + Tensor rP = !Is_dropout ? flash::convert_type(scores) + : flash::convert_type_relu(scores); + // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, + // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using + // m16n8k8. + Tensor tPrP = make_tensor( + rP.data(), + flash::convert_layout_rowcol_Aregs( + rP.layout())); + Tensor tPaP = + smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); + // if (cute::thread0()) { print(tPaP); } // __syncthreads(); - // if (blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0 && tidx < 64) { - // printf("tidx = %d, tdQgdQaccum = 0x%p\n", tidx, tdQgdQaccum.data()); + // if (cute::thread0()) { print(sP); } + + Tensor acc_dp = partition_fragment_C( + tiled_mma_sdp, + Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) + CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA + + clear(acc_dp); + // Tensor acc_dp_reshaped = make_tensor(acc_dp.data(), + // flash::convert_layout_acc_rowcol(acc_dp.layout())); #pragma unroll for + // (int mi = 0; mi < size<0>(acc_dp_reshaped); ++mi) { + // #pragma unroll + // for (int ni = 0; ni < size<1>(acc_dp_reshaped); ++ni) { + // acc_dp_reshaped(mi, ni) = -dP_sum(mi); + // } // } - typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; - auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); - Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) - Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) - Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) - Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) - - typename Kernel_traits::TiledMmadKV tiled_mma_dkv; - auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); - Tensor tdKrdSt = thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdKrQt = thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) - Tensor tdVrPt = thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdVrdO = thr_mma_dkv.partition_fragment_B(sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) - - typename Kernel_traits::TiledMmadQ tiled_mma_dq; - auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); - Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) - Tensor tdQrKt = thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) - - Tensor acc_dk = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - Tensor acc_dv = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - - // - // Copy Atom retiling - // - - auto smem_tiled_copy_QdO = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); - Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); - Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); - - // auto smem_thr_copy_KV = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp).get_thread_slice(tidx); - auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); - Tensor tSsK = smem_thr_copy_KV.partition_S(sK); - // if (cute::thread(0, 0) && n_block == 0) { printf("sK layout: "); print(sK.layout()); printf("\n"); } - // if (cute::thread(0, 0) && n_block == 0) { print(tSsK.layout()); printf("\n"); } - Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); - - // Partition sP and sdS to match the accumulator partitioning - // This has to be tiled_mma_sdp, not tiled_mma_dkv - // auto smem_thr_copy_PdS = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp).get_thread_slice(tidx); - auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN(typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); - auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); - Tensor tPsP = smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) - // if (cute::thread(0, 0) && n_block == 0) { printf("sP layout: "); print(sP.layout()); printf("\n"); } - // if (cute::thread(0, 0) && n_block == 0) { print(tPsP.layout()); printf("\n"); } - // if (n_block == 0 && blockIdx.x == 0 && blockIdx.y == 0 && tidx < 64) { - // printf("tidx=%d, tPsP = 0x%p\n", tidx, tPsP.data()); - // } - Tensor tdSsdS = smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - auto smem_tiled_copy_PdSt = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); - Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); - Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); - - auto smem_tiled_copy_QdOt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); - Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); - Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); - - auto smem_tiled_copy_dS = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); - auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); - Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); - - auto smem_tiled_copy_Kt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); - auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); - Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); - - auto smem_tiled_copy_dQ = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); - auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor taccdQsdQ = smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - // - // PREDICATES - // - - Tensor cQ = make_identity_tensor(make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor cKV = make_identity_tensor(make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - Tensor tQcQ = gmem_thr_copy_QKV.partition_D(cQ); - Tensor tKVcKV = gmem_thr_copy_QKV.partition_D(cKV); - - // Allocate predicate tensors for k - Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); - Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); - - // Set predicates for k bounds - if (!Is_even_K) { - #pragma unroll - for (int k = 0; k < size(tQpQ); ++k) { tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; } - #pragma unroll - for (int k = 0; k < size(tKVpKV); ++k) { tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; } + // if (cute::thread0()) { print(dP_sum); } + + flash::gemm( + acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, + smem_thr_copy_KV); + + // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, + // MMA_N)) + Tensor dS = make_tensor(acc_dp.data(), scores.layout()); + auto pointwise_mult = [](float p, float dp, float d) { + return p * (!Is_dropout || p >= 0 ? dp - d : d); + }; +#pragma unroll + for (int mi = 0; mi < size<0>(dS); ++mi) { +#pragma unroll + for (int ni = 0; ni < size<1>(dS); ++ni) { + dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); + } } - - // Prologue - - // We'll advance gdQ and gdQaccum before the 1st read/write. - tdQgdQ.data() = tdQgdQ.data() + kBlockM * params.dq_row_stride; - tdQgdQaccum.data() = tdQgdQaccum.data() + kBlockM * params.h * params.d_rounded; - - int m_block = m_block_max - 1; - int m_block_min = (!Is_causal && !Is_local) - ? 0 - : std::max(0, (n_block * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k - params.window_size_right) / kBlockM); - // If not local, we're guaranteed that m_block_min <= m_block: - // We checked earlier that n_block * kBlockN < actual_seqlen_k, so in the causal case, - // n_block * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k < actual_seqlen_q. - // So m_block_min <= (actual_seqlen_q - 1) / kBlockM. - // Recall that m_block_max = cute::ceil_div(binfo.actual_seqlen_q, kBlockM) = (actual_seqlen_q + kBlockM - 1) / kBlockM. - // So m_block_m - 1 = (actual_seqlen_q - 1) / kBlockM. - // We conclude that m_block_min <= m_block, so we will always have at least 1 iteration of the for loop. - // However, if local, then this possible to have some blocks of K & V not attending to any query. - // We might need to exit early and write 0 to dK and dV for those blocks. - // Otherwise we get wrong result for the case where we don't enter the for loop. - // And we might read OOB elements from gQ and gdO. - // This also covers the case where actual_seqlen_q == 0 - if ((Is_local || !Is_even_MN) && m_block < m_block_min) { - const index_t row_offset_dk = binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) - + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; - const index_t row_offset_dv = binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) - + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; - Tensor gdK = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), - Shape, Int>{}, - make_stride(params.dk_row_stride, _1{})); - Tensor gdV = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), - Shape, Int>{}, - make_stride(params.dv_row_stride, _1{})); - typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; - auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); - Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); - Tensor tdKrdK = make_tensor(shape(tdKgdK)); - Tensor tdVrdV = make_tensor(shape(tdVgdV)); - clear(tdKrdK); - clear(tdVrdV); - Tensor cdKV = make_identity_tensor(make_shape(size<0>(gdK), size<1>(gdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); - Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); - #pragma unroll - for (int k = 0; k < size(tdKVpdKV); ++k) { tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy( - gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - flash::copy( - gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - return; + // if (cute::thread0()) { print(dS); } + + Tensor acc_dq = partition_fragment_C( + tiled_mma_dq, + Shape, Int>{}); // MMA, MMA_N, MMA_K + tdQgdQaccum.data() = + tdQgdQaccum.data() + (-int(kBlockM * params.h * params.d_rounded)); + if (Is_first || Seq_parallel) { + clear(acc_dq); + } else { + // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum + Tensor acc_dq_reshaped = + make_tensor(acc_dq.data(), make_layout(get<0>(acc_dq.layout()), + get<2>(acc_dq.layout()), + get<1>(acc_dq.layout()))); + cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, acc_dq_reshaped); } - if (Double_buffer && m_block % 2 == 1) { // Double buffer for sQ - tQsQ.data() = tQsQ.data() + size(sQ); - tSsQ.data() = tSsQ.data() + size(sQ); - tdKsQt.data() = tdKsQt.data() + size(sQ); + if (Double_buffer && m_block > m_block_min) { + // Double buffer for sQ + const int sQ_offset = m_block % 2 == 0 ? size(sQ) : -size(sQ); + tQsQ.data() = tQsQ.data() + sQ_offset; + tSsQ.data() = tSsQ.data() + sQ_offset; + // Advance gQ + tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tQgQ, + tQsQ, tQcQ, tQpQ); + flash::cp_async_fence(); } - if ((!Is_first && !Seq_parallel) || params.deterministic) { __syncthreads(); } + Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); + // Convert dS from fp32 to fp16 + Tensor tdSrdS = flash::convert_type(dS_reshaped); + // if (cute::thread0()) { print(tPrP); } + Tensor tdSadS = + smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); + __syncthreads(); - if (Kernel_traits::Is_V_in_regs) { - // Clear the smem tiles to account for predicated off loads - flash::copy( - gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN - ); + // Layout p_l = tPrP.layout(); + // Tensor tdVrPt = make_tensor(tPrP.data(), make_layout(get<0>(p_l), + // get<2>(p_l), get<1>(p_l))); flash::gemm_A_in_regs(acc_dv, tdVrPt, tdVrdO, + // tdVsdOt, tiled_mma_dkv, smem_thr_copy_QdOt); Tensor tdKrdSt = + // make_tensor(tdSrdS.data(), tdVrPt.layout()); + // flash::gemm_A_in_regs(acc_dk, tdKrdSt, tdKrQt, tdKsQt, tiled_mma_dkv, + // smem_thr_copy_QdOt); + flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, + smem_thr_copy_QdOt); + // if (cute::thread0() && n_block == 0 && m_block == 0) { print(tdVrPt); } + // if (cute::thread0()) { print(acc_dv); } + + __syncthreads(); // Need syncthreads since we're writing to the same sdO + // location + + if (m_block > m_block_min) { + // Advance gdO + tdOgdO.data() = tdOgdO.data() + (-int(kBlockM * params.do_row_stride)); + if (Is_first) { + tdOgO.data() = tdOgO.data() + (-int(kBlockM * params.o_row_stride)); + flash::copy(gmem_tiled_copy_dO, tdOgdO, + tdOrdO, tQcQ, tQpQ); + flash::copy(gmem_tiled_copy_dO, tdOgO, + tdOrO, tQcQ, tQpQ); + } else { + flash::copy(gmem_tiled_copy_dO, tdOgdO, + tdOsdO, tQcQ, tQpQ); flash::cp_async_fence(); + } } - Tensor tdOrdO = make_fragment_like(tdOgdO); - Tensor tdOrO = make_fragment_like(tdOgO); - if (!Is_first) { - // Clear the smem tiles to account for predicated off loads - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOsdO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM - ); - } else { - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM - ); - flash::copy( - gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM - ); + flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, + smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, + smem_thr_copy_Kt); + // if (cute::thread0()) { print(acc_dq); } + + if (m_block > m_block_min) { + gLSE.data() = gLSE.data() + (-int(kBlockM)); +#pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + lse(mi) = gLSE(get<0>(taccScS_row(mi))); + } + gdPsum.data() = gdPsum.data() + (-int(kBlockM)); } - flash::copy( - gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM - ); - - Tensor caccS = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) - Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) - static_assert(decltype(size<0>(taccScS))::value == 4); - // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. - Tensor taccScS_row = logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); - Tensor lse = make_tensor(Shape>{}); - #pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { - const int row = get<0>(taccScS_row(mi)); - lse(mi) = Is_even_MN || row < binfo.actual_seqlen_q - m_block * kBlockM ? gLSE(row) : INFINITY; - } - // We want LSE = inf if the row is OOB. In that case Q would be zero, K would be zero, - // and scores would be zero. With LSE = 0, probs will be all 1's, and when we multiply - // with V (which would be zero), we're fine. However, with ALiBi, we might modify these - // scores, and probs can become NaN. Instead if we set LSE = inf for OOB rows, probs are always 0. - // Tensor tKrK = make_fragment_like(tKsK); - // // cute::copy(gmem_tiled_copy_QKV, tKgK(_, _, _, 0), tKrK); - // cute::copy(gmem_tiled_copy_QKV, tKgK, tKrK); - // // if (cute::thread(1, 0)) { print(tKrK); } - - flash::copy( - gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - if (!Kernel_traits::Is_V_in_regs) { - flash::copy( - gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN - ); + if (!Is_last) { + // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum + Tensor acc_dq_reshaped = + make_tensor(acc_dq.data(), make_layout(get<0>(acc_dq.layout()), + get<2>(acc_dq.layout()), + get<1>(acc_dq.layout()))); + if (!Seq_parallel) { + cute::copy(gmem_tiled_copy_dQaccum, acc_dq_reshaped, tdQgdQaccum); + } else { + // if (cute::thread0()) { print(acc_dq.layout()); printf("\n"); + // print(acc_dq_reshaped.layout()); printf("\n"); + // print(tdQgdQaccum.layout()); printf("\n"); } + CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); +#pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { + atomicAdd(&tdQgdQaccum(i), acc_dq(i)); + } + } + } else { +#pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { + acc_dq(i) *= params.scale_softmax_rp_dropout; + } + // Convert acc_dq from fp32 to fp16 + Tensor rdQ = flash::convert_type(acc_dq); + Tensor taccdQrdQ = + smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); } - flash::cp_async_fence(); - // if (cute::thread0()) { print(tdOgdO.layout()); printf("\n"); print(tdOrdO); print(tdOrO); } - if (Is_first) { - cute::copy(tdOrdO, tdOsdO); - dot_do_o(tdOrdO, tdOrO, gdPsum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout); + flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, + smem_thr_copy_QdOt); + // if (cute::thread0()) { print(acc_dk); } + if (Double_buffer) { // Double buffer for sQ + tdKsQt.data() = tdKsQt.data() + (m_block % 2 == 0 ? size(sQ) : -size(sQ)); } - - if (Kernel_traits::Is_V_in_regs) { - cute::cp_async_wait<1>(); - __syncthreads(); - Tensor tdPrV_copy_view = smem_thr_copy_KV.retile_D(tdPrV); - CUTE_STATIC_ASSERT_V(size<1>(tdPsV) == size<1>(tdPrV_copy_view)); // M - cute::copy(smem_tiled_copy_KV, tdPsV, tdPrV_copy_view); + if (!Double_buffer && m_block > m_block_min) { + __syncthreads(); + // Advance gQ + tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tQgQ, + tQsQ, tQcQ, tQpQ); + flash::cp_async_fence(); } - auto seed = params.rng_state[0]; - auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; - - clear(acc_dv); - clear(acc_dk); - - float alibi_slope = !Has_alibi ? 0.0f : reinterpret_cast(params.alibi_slopes_ptr)[bidb * params.alibi_slopes_batch_stride + bidh] / params.scale_softmax; - - for (; m_block >= m_block_min; --m_block) { - Tensor acc_s = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) - clear(acc_s); - cute::cp_async_wait<0>(); - __syncthreads(); - - Tensor dP_sum = make_fragment_like(lse); - #pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { dP_sum(mi) = gdPsum(get<0>(taccScS_row(mi))); } - - // if (cute::thread0()) { print(sK); } - // Tensor tSrK_copy_view = smem_thr_copy_KV.retile_D(tSrK); - // #pragma unroll - // for (int k = 0; k < size<2>(tSrK_copy_view); ++k) { - // cute::copy(smem_tiled_copy_KV, tSsK(_, _, k), tSrK_copy_view(_, _, k)); - // } - // if (cute::thread0()) { print(tSrK); } - flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV); - - // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) - Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); - // if (cute::thread(32, 0)) { print(scores); } - - if (Has_alibi) { - flash::apply_alibi( - scores, - n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, - binfo.actual_seqlen_k, - m_block * kBlockM + get<0>(taccScS_row(0)), - binfo.actual_seqlen_q, - AtomLayoutMS * 16, - alibi_slope - ); - } - - // TD [2023-07-29]: I was thinking that we don't need to mask out the elements beyond - // actual_seqlen_k, because acc_s would be some finite value for those indices. - // In the end when we multiply with K to get dQ, the corresponding values of K would be 0, - // so the result would still be correct. - // However, it's possible that the values in acc_s are so large that they overflow - // when we multiply with dP and convert to fp16, resulting in Inf in dS and NaNs in dQ. - // So we need to mask out the elements beyond actual_seqlen_k. - if (!Is_causal && !Is_local) { - if (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k) { - flash::apply_mask(scores, binfo.actual_seqlen_k, - n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16); - } - } else if (Is_causal) { - // Putting this causal masking right after acc_s is *much* slower for some reason. - // TD [2023-08-16]: We need the 2nd condition because if seqlen_q is long and seqlen_k is short - // (e.g., 256 and 2), the 2nd block of seqlen_q (from 128 to 255), we're not doing causal masking. - // But we still want to mask out elements beyond actual_seqlen_k. - if (m_block * kBlockM < (n_block + 1) * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k - || (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k)) { - flash::apply_mask_causal(scores, n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, - binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), - binfo.actual_seqlen_q, - // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % AtomLayoutMS * 16 + (tidx % 32) / 4, - AtomLayoutMS * 16); - } - } else if (Is_local) { - if (m_block * kBlockM < (n_block + 1) * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k - params.window_size_right - || (m_block + 1) * kBlockM >= n_block * kBlockN + binfo.actual_seqlen_q - binfo.actual_seqlen_k + params.window_size_left - || (!Is_even_MN && (n_block + 1) * kBlockN >= binfo.actual_seqlen_k)) { - flash::apply_mask_local(scores, n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, - binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), - binfo.actual_seqlen_q, AtomLayoutMS * 16, - params.window_size_left, params.window_size_right); - } - - } - - // if (cute::thread(32, 0)) { print(scores); } - // Compute the exponential value. - flash::scale_apply_exp2(scores, lse, params.scale_softmax_log2); - if (Is_dropout) { - int warp_id = tidx / 32; - int block_row_idx = m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; - // Need col to be multiples of 32, since we're doing dropout with block of 16 x 32 - static_assert(MMA_N_SdP % 2 == 0); - int block_col_idx = n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); - Tensor scores_dropped = make_tensor(scores.data(), flash::convert_layout_rowcol_Aregs(scores.layout())); - flash::apply_dropout( - scores_dropped, params.p_dropout_in_uint8_t, seed, offset, - block_row_idx, block_col_idx, AtomLayoutMS - ); - } - // Convert scores from fp32 to fp16/bf16 - Tensor rP = !Is_dropout - ? flash::convert_type(scores) - : flash::convert_type_relu(scores); - // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, MMA_N / 2) - // if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using m16n8k8. - Tensor tPrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs(rP.layout())); - Tensor tPaP = smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); - // if (cute::thread0()) { print(tPaP); } - // __syncthreads(); - // if (cute::thread0()) { print(sP); } - - Tensor acc_dp = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) - CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA - - clear(acc_dp); - // Tensor acc_dp_reshaped = make_tensor(acc_dp.data(), flash::convert_layout_acc_rowcol(acc_dp.layout())); - // #pragma unroll - // for (int mi = 0; mi < size<0>(acc_dp_reshaped); ++mi) { - // #pragma unroll - // for (int ni = 0; ni < size<1>(acc_dp_reshaped); ++ni) { - // acc_dp_reshaped(mi, ni) = -dP_sum(mi); - // } - // } - - // if (cute::thread0()) { print(dP_sum); } - - flash::gemm( - acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV - ); - - // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) - Tensor dS = make_tensor(acc_dp.data(), scores.layout()); - auto pointwise_mult = [](float p, float dp, float d) { - return p * (!Is_dropout || p >= 0 ? dp - d : d); - }; - #pragma unroll - for (int mi = 0; mi < size<0>(dS); ++mi) { - #pragma unroll - for (int ni = 0; ni < size<1>(dS); ++ni) { - dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); - } - } - // if (cute::thread0()) { print(dS); } - - Tensor acc_dq = partition_fragment_C(tiled_mma_dq, Shape, Int>{}); // MMA, MMA_N, MMA_K - tdQgdQaccum.data() = tdQgdQaccum.data() + (-int(kBlockM * params.h * params.d_rounded)); - if (Is_first || Seq_parallel) { - clear(acc_dq); - } else { - // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum - Tensor acc_dq_reshaped = make_tensor(acc_dq.data(), - make_layout(get<0>(acc_dq.layout()), - get<2>(acc_dq.layout()), - get<1>(acc_dq.layout()))); - cute::copy(gmem_tiled_copy_dQaccum, tdQgdQaccum, acc_dq_reshaped); - } - - if (Double_buffer && m_block > m_block_min) { - // Double buffer for sQ - const int sQ_offset = m_block % 2 == 0 ? size(sQ) : -size(sQ); - tQsQ.data() = tQsQ.data() + sQ_offset; - tSsQ.data() = tSsQ.data() + sQ_offset; - // Advance gQ - tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ); - flash::cp_async_fence(); - } - - Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); - // Convert dS from fp32 to fp16 - Tensor tdSrdS = flash::convert_type(dS_reshaped); - // if (cute::thread0()) { print(tPrP); } - Tensor tdSadS = smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); - __syncthreads(); - - // Layout p_l = tPrP.layout(); - // Tensor tdVrPt = make_tensor(tPrP.data(), make_layout(get<0>(p_l), get<2>(p_l), get<1>(p_l))); - // flash::gemm_A_in_regs(acc_dv, tdVrPt, tdVrdO, tdVsdOt, tiled_mma_dkv, smem_thr_copy_QdOt); - // Tensor tdKrdSt = make_tensor(tdSrdS.data(), tdVrPt.layout()); - // flash::gemm_A_in_regs(acc_dk, tdKrdSt, tdKrQt, tdKsQt, tiled_mma_dkv, smem_thr_copy_QdOt); - flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); - // if (cute::thread0() && n_block == 0 && m_block == 0) { print(tdVrPt); } - // if (cute::thread0()) { print(acc_dv); } - - __syncthreads(); // Need syncthreads since we're writing to the same sdO location - - if (m_block > m_block_min) { - // Advance gdO - tdOgdO.data() = tdOgdO.data() + (-int(kBlockM * params.do_row_stride)); - if (Is_first) { - tdOgO.data() = tdOgO.data() + (-int(kBlockM * params.o_row_stride)); - flash::copy(gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ); - flash::copy(gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ); - } else { - flash::copy(gmem_tiled_copy_dO, tdOgdO, tdOsdO, tQcQ, tQpQ); - flash::cp_async_fence(); - } - } - - flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, - smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, smem_thr_copy_Kt); - // if (cute::thread0()) { print(acc_dq); } - - if (m_block > m_block_min) { - gLSE.data() = gLSE.data() + (-int(kBlockM)); - #pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { lse(mi) = gLSE(get<0>(taccScS_row(mi))); } - gdPsum.data() = gdPsum.data() + (-int(kBlockM)); - } - - if (!Is_last) { - // Reshape acc_dq from (4, 1, 2) to (4, 2, 1) to write to gdQaccum - Tensor acc_dq_reshaped = make_tensor(acc_dq.data(), - make_layout(get<0>(acc_dq.layout()), - get<2>(acc_dq.layout()), - get<1>(acc_dq.layout()))); - if (!Seq_parallel) { - cute::copy(gmem_tiled_copy_dQaccum, acc_dq_reshaped, tdQgdQaccum); - } else { - // if (cute::thread0()) { print(acc_dq.layout()); printf("\n"); print(acc_dq_reshaped.layout()); printf("\n"); print(tdQgdQaccum.layout()); printf("\n"); } - CUTE_STATIC_ASSERT_V(size(acc_dq) == size(tdQgdQaccum)); - #pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { atomicAdd(&tdQgdQaccum(i), acc_dq(i)); } - } - } else { - #pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) *= params.scale_softmax_rp_dropout; } - // Convert acc_dq from fp32 to fp16 - Tensor rdQ = flash::convert_type(acc_dq); - Tensor taccdQrdQ = smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); - } - - flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); - // if (cute::thread0()) { print(acc_dk); } - if (Double_buffer) { // Double buffer for sQ - tdKsQt.data() = tdKsQt.data() + (m_block % 2 == 0 ? size(sQ) : -size(sQ)); - } - if (!Double_buffer && m_block > m_block_min) { - __syncthreads(); - // Advance gQ - tQgQ.data() = tQgQ.data() + (-int(kBlockM * params.q_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ); - flash::cp_async_fence(); - } - - if (Is_first && m_block > m_block_min) { - cute::copy(tdOrdO, tdOsdO); - dot_do_o(tdOrdO, tdOrO, gdPsum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout); - } + if (Is_first && m_block > m_block_min) { + cute::copy(tdOrdO, tdOsdO); + dot_do_o( + tdOrdO, tdOrO, gdPsum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), + params.p_dropout); + } - if (Is_last) { - __syncthreads(); - Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); - cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); - tdQgdQ.data() = tdQgdQ.data() + (-int(kBlockM * params.dq_row_stride)); - Tensor cdQ = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); - #pragma unroll - for (int m = 0; m < size<1>(tdQgdQ); ++m) { - if (Is_even_MN || get<0>(tdQcdQ(0, m, 0)) < binfo.actual_seqlen_q - m_block * kBlockM) { - cute::copy(gmem_tiled_copy_dQ, tdQrdQ(_, m, _), tdQgdQ(_, m, _)); - } - } + if (Is_last) { + __syncthreads(); + Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); + cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); + tdQgdQ.data() = tdQgdQ.data() + (-int(kBlockM * params.dq_row_stride)); + Tensor cdQ = make_identity_tensor( + Shape, + Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); +#pragma unroll + for (int m = 0; m < size<1>(tdQgdQ); ++m) { + if (Is_even_MN || get<0>(tdQcdQ(0, m, 0)) < + binfo.actual_seqlen_q - m_block * kBlockM) { + cute::copy(gmem_tiled_copy_dQ, tdQrdQ(_, m, _), tdQgdQ(_, m, _)); } - + } } + } - // Epilogue + // Epilogue - if (Is_dropout) { - #pragma unroll - for (int i = 0; i < size(acc_dv); ++i) { acc_dv(i) *= params.rp_dropout; } + if (Is_dropout) { +#pragma unroll + for (int i = 0; i < size(acc_dv); ++i) { + acc_dv(i) *= params.rp_dropout; } - #pragma unroll - for (int i = 0; i < size(acc_dk); ++i) { acc_dk(i) *= params.scale_softmax_rp_dropout; } - - // Convert acc_dv from fp32 to fp16 - Tensor rdK = flash::convert_type(acc_dk); - Tensor rdV = flash::convert_type(acc_dv); - - Tensor sdK = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) - Tensor sdV = make_tensor(sdK.data() + size(sdK), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) - - // Partition sdV and sdK to match the accumulator partitioning - auto smem_tiled_copy_dKV = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); - auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor taccdKrdK = smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdKsdK = smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) - Tensor taccdVrdV = smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdVsdV = smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - // We need syncthreads here since we're writing to the same location as sK and sV. - // Without syncthreads, some thread might modify the location of sK while another thread - // is reading it for dQ gemm, leading to a race condition. - // If Is_last, there's already a __syncthreads() at the end of the loop. - if (!Is_last) { __syncthreads(); } - - cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); - cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); - - const index_t row_offset_dk = binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) - + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; - const index_t row_offset_dv = binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) - + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; - Tensor gdK = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), - Shape, Int>{}, - make_stride(params.dk_row_stride, _1{})); - Tensor gdV = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), - Shape, Int>{}, - make_stride(params.dv_row_stride, _1{})); - - typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; - auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); - Tensor tdKsdK = gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); - Tensor tdVsdV = gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); - + } +#pragma unroll + for (int i = 0; i < size(acc_dk); ++i) { + acc_dk(i) *= params.scale_softmax_rp_dropout; + } + + // Convert acc_dv from fp32 to fp16 + Tensor rdK = flash::convert_type(acc_dk); + Tensor rdV = flash::convert_type(acc_dv); + + Tensor sdK = make_tensor( + sK.data(), typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) + Tensor sdV = + make_tensor(sdK.data() + size(sdK), + typename Kernel_traits::SmemLayoutdKV{}); // (SMEM_N, SMEM_K) + + // Partition sdV and sdK to match the accumulator partitioning + auto smem_tiled_copy_dKV = make_tiled_copy_C( + typename Kernel_traits::SmemCopyAtomdKV{}, tiled_mma_dkv); + auto smem_thr_copy_dKV = smem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor taccdKrdK = + smem_thr_copy_dKV.retile_S(rdK); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdKsdK = + smem_thr_copy_dKV.partition_D(sdK); // ((Atom,AtomNum),PIPE_M,PIPE_N) + Tensor taccdVrdV = + smem_thr_copy_dKV.retile_S(rdV); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdVsdV = + smem_thr_copy_dKV.partition_D(sdV); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + // We need syncthreads here since we're writing to the same location as sK and + // sV. Without syncthreads, some thread might modify the location of sK while + // another thread is reading it for dQ gemm, leading to a race condition. If + // Is_last, there's already a __syncthreads() at the end of the loop. + if (!Is_last) { __syncthreads(); - Tensor tdKrdK = make_tensor(shape(tdKgdK)); - cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); - Tensor tdVrdV = make_tensor(shape(tdVgdV)); - cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); - Tensor cdKV = make_identity_tensor(make_shape(size<0>(sdK), size<1>(sdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); - Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); - #pragma unroll - for (int k = 0; k < size(tdKVpdKV); ++k) { tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy( - gmem_tiled_copy_dKV, tdKrdK, tdKgdK, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - flash::copy( - gmem_tiled_copy_dKV, tdVrdV, tdVgdV, tdKVcdKV, tdKVpdKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - + } + + cute::copy(smem_tiled_copy_dKV, taccdKrdK, taccdKsdK); + cute::copy(smem_tiled_copy_dKV, taccdVrdV, taccdVsdV); + + const index_t row_offset_dk = + binfo.k_offset(params.dk_batch_stride, params.dk_row_stride, bidb) + + n_block * kBlockN * params.dk_row_stride + bidh * params.dk_head_stride; + const index_t row_offset_dv = + binfo.k_offset(params.dv_batch_stride, params.dv_row_stride, bidb) + + n_block * kBlockN * params.dv_row_stride + bidh * params.dv_head_stride; + Tensor gdK = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dk_ptr) + row_offset_dk), + Shape, Int>{}, + make_stride(params.dk_row_stride, _1{})); + Tensor gdV = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dv_ptr) + row_offset_dv), + Shape, Int>{}, + make_stride(params.dv_row_stride, _1{})); + + typename Kernel_traits::GmemTiledCopydKV gmem_tiled_copy_dKV; + auto gmem_thr_copy_dKV = gmem_tiled_copy_dKV.get_thread_slice(tidx); + Tensor tdKsdK = + gmem_thr_copy_dKV.partition_S(sdK); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdKgdK = gmem_thr_copy_dKV.partition_D(gdK); + Tensor tdVsdV = + gmem_thr_copy_dKV.partition_S(sdV); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdVgdV = gmem_thr_copy_dKV.partition_D(gdV); + + __syncthreads(); + Tensor tdKrdK = make_tensor(shape(tdKgdK)); + cute::copy(gmem_tiled_copy_dKV, tdKsdK, tdKrdK); + Tensor tdVrdV = make_tensor(shape(tdVgdV)); + cute::copy(gmem_tiled_copy_dKV, tdVsdV, tdVrdV); + Tensor cdKV = make_identity_tensor( + make_shape(size<0>(sdK), size<1>(sdK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + Tensor tdKVcdKV = gmem_thr_copy_dKV.partition_D(cdKV); + Tensor tdKVpdKV = make_tensor(make_shape(size<2>(tdKgdK))); +#pragma unroll + for (int k = 0; k < size(tdKVpdKV); ++k) { + tdKVpdKV(k) = get<1>(tdKVcdKV(0, 0, k)) < params.d; + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy(gmem_tiled_copy_dKV, tdKrdK, tdKgdK, + tdKVcdKV, tdKVpdKV, + binfo.actual_seqlen_k - n_block * kBlockN); + flash::copy(gmem_tiled_copy_dKV, tdVrdV, tdVgdV, + tdKVcdKV, tdKVpdKV, + binfo.actual_seqlen_k - n_block * kBlockN); } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void compute_dq_dk_dv_1rowblock(const Params ¶ms, const int bidb, const int bidh, const int m_block) { - - using Element = typename Kernel_traits::Element; - using ElementAccum = typename Kernel_traits::ElementAccum; - using index_t = typename Kernel_traits::index_t; - - // Shared memory. - extern __shared__ char smem_[]; - - // The thread index. - const int tidx = threadIdx.x; - - constexpr int kBlockM = Kernel_traits::kBlockM; - constexpr int kBlockN = Kernel_traits::kBlockN; - constexpr int kHeadDim = Kernel_traits::kHeadDim; - // constexpr int kNWarps = Kernel_traits::kNWarps; - constexpr int MMA_N_SdP = kBlockN / decltype(size<1>(typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; - constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; - - const BlockInfo binfo(params, bidb); - if (m_block * kBlockM >= binfo.actual_seqlen_q || binfo.actual_seqlen_k == 0) return; - - int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN); - if (Is_causal) { - n_block_max = std::min(n_block_max, cute::ceil_div((m_block + 1) * kBlockM, kBlockN)); +template +inline __device__ void +compute_dq_dk_dv_1rowblock(const Params ¶ms, const int bidb, const int bidh, + const int m_block) { + + using Element = typename Kernel_traits::Element; + using ElementAccum = typename Kernel_traits::ElementAccum; + using index_t = typename Kernel_traits::index_t; + + // Shared memory. + extern __shared__ char smem_[]; + + // The thread index. + const int tidx = threadIdx.x; + + constexpr int kBlockM = Kernel_traits::kBlockM; + constexpr int kBlockN = Kernel_traits::kBlockN; + constexpr int kHeadDim = Kernel_traits::kHeadDim; + // constexpr int kNWarps = Kernel_traits::kNWarps; + constexpr int MMA_N_SdP = + kBlockN / + decltype(size<1>( + typename Kernel_traits::TiledMmaSdP::TiledShape_MNK{}))::value; + constexpr int AtomLayoutMS = Kernel_traits::AtomLayoutMSdP; + + const BlockInfo binfo(params, bidb); + if (m_block * kBlockM >= binfo.actual_seqlen_q || binfo.actual_seqlen_k == 0) + return; + + int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN); + if (Is_causal) { + n_block_max = + std::min(n_block_max, cute::ceil_div((m_block + 1) * kBlockM, kBlockN)); + } + + // We iterate over the blocks in reverse order. This is because the last block + // is the only one that needs masking when we read K and V from global memory. + // Moreover, iterating in reverse might save us 1 register (we just need + // n_block instead of both n_block and n_block_max). + + const index_t row_offset_q = + binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride; + // We move K and V to the last block. + const index_t row_offset_k = + binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + + (n_block_max - 1) * kBlockN * params.k_row_stride + + (bidh / params.h_h_k_ratio) * params.k_head_stride; + const index_t row_offset_v = + binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + + (n_block_max - 1) * kBlockN * params.v_row_stride + + (bidh / params.h_h_k_ratio) * params.v_head_stride; + const index_t row_offset_do = + binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) + + m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; + const index_t row_offset_o = + binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; + // We'll advance gdKaccum and gdVaccum before the first write. + const index_t row_offset_dkv_accum = + ((bidb * params.h_k + (bidh / params.h_h_k_ratio)) * + params.seqlen_k_rounded + + n_block_max * kBlockN) * + params.d_rounded; + const index_t row_offset_lse = + (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM; + + // We assume that params.d == kHeadDim for now + Tensor gQ = make_tensor( + make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), + Shape, Int>{}, + make_stride(params.q_row_stride, _1{})); + Tensor gK = make_tensor( + make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), + Shape, Int>{}, + make_stride(params.k_row_stride, _1{})); + Tensor gV = make_tensor( + make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), + Shape, Int>{}, + make_stride(params.v_row_stride, _1{})); + Tensor gdO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), + Shape, Int>{}, + make_stride(params.do_row_stride, _1{})); + Tensor gO = make_tensor( + make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), + Shape, Int>{}, + make_stride(params.o_row_stride, _1{})); + Tensor gdKaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + Tensor gdVaccum = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + + row_offset_dkv_accum), + Shape, Int>{}, Stride, _1>{}); + Tensor gLSE = make_tensor( + make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + + row_offset_lse), + Shape>{}, Stride<_1>{}); + + Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), + typename Kernel_traits::SmemLayoutQdO{}); + Tensor sQt = + make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sQtNoSwizzle = make_tensor( + sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + Tensor sdO = make_tensor(sQ.data() + size(sQ), + typename Kernel_traits::SmemLayoutQdO{}); + Tensor sdOt = make_tensor(sdO.data(), + typename Kernel_traits::SmemLayoutQdOtransposed{}); + Tensor sdOtransposedNoSwizzle = make_tensor( + sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); + Tensor sK = make_tensor(sdO.data() + size(sdO), + typename Kernel_traits::SmemLayoutKV{}); + // Double buffer for sK + Tensor sV = make_tensor(sK.data() + 2 * size(sK), + typename Kernel_traits::SmemLayoutKV{}); + Tensor sKt = + make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); + Tensor sKtNoSwizzle = make_tensor( + sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); + Tensor sdS = make_tensor(sV.data() + size(sV), + typename Kernel_traits::SmemLayoutPdS{}); + Tensor sdSt = make_tensor(sdS.data(), + typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sdStNoSwizzle = make_tensor( + sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + Tensor sP = make_tensor(sdS.data() + size(sdS), + typename Kernel_traits::SmemLayoutPdS{}); + Tensor sPt = + make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); + Tensor sPtNoSwizzle = make_tensor( + sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); + Tensor sdPsum = make_tensor( + make_smem_ptr(reinterpret_cast(sdS.data().get())), + Shape>{}); + + typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; + auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; + auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); + typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd + gmem_tiled_copy_dKVaccum; + auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); + + Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); + Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); + Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); + Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); + Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); + Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) + Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); + Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) + Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); + Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); + Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); + + typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; + auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); + Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) + Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) + Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) + Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) + + typename Kernel_traits::TiledMmadKV tiled_mma_dkv; + auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); + Tensor tdKrdSt = + thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdKrQt = + thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) + Tensor tdVrPt = + thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) + Tensor tdVrdO = thr_mma_dkv.partition_fragment_B( + sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) + + typename Kernel_traits::TiledMmadQ tiled_mma_dq; + auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); + Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) + Tensor tdQrKt = + thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) + + Tensor acc_dq = partition_fragment_C( + tiled_mma_dq, + Shape, Int>{}); // MMA, MMA_M_SdP, MMA_K + + // + // Copy Atom retiling + // + + auto smem_tiled_copy_QdO = + make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); + Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); + Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); + + auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN( + typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); + auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); + Tensor tSsK = smem_thr_copy_KV.partition_S(sK); + Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); + + // Partition sP and sdS to match the accumulator partitioning + // This has to be tiled_mma_sdp, not tiled_mma_dkv + auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN( + typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); + auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); + Tensor tPsP = + smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) + Tensor tdSsdS = + smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + auto smem_tiled_copy_PdSt = make_tiled_copy_A( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); + Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); + Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); + + auto smem_tiled_copy_QdOt = make_tiled_copy_B( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); + auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); + Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); + Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); + + auto smem_tiled_copy_dS = + make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); + auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); + Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); + + auto smem_tiled_copy_Kt = make_tiled_copy_B( + typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); + auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); + Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); + + // + // PREDICATES + // + + // Construct identity layout for sQ and sK + Tensor cQ = make_identity_tensor( + make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor cKV = make_identity_tensor( + make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) + // Repeat the partitioning with identity layouts + Tensor tQcQ = gmem_thr_copy_QKV.partition_S( + cQ); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) + Tensor tKVcKV = gmem_thr_copy_QKV.partition_S( + cKV); // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k) + + // Allocate predicate tensors for k + Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); + Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); + + // Set predicates for k bounds + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tQpQ); ++k) { + tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; } - - // We iterate over the blocks in reverse order. This is because the last block is the only one - // that needs masking when we read K and V from global memory. Moreover, iterating in reverse - // might save us 1 register (we just need n_block instead of both n_block and n_block_max). - - const index_t row_offset_q = binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) - + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride; - // We move K and V to the last block. - const index_t row_offset_k = binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) - + (n_block_max - 1) * kBlockN * params.k_row_stride + (bidh / params.h_h_k_ratio) * params.k_head_stride; - const index_t row_offset_v = binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) - + (n_block_max - 1) * kBlockN * params.v_row_stride + (bidh / params.h_h_k_ratio) * params.v_head_stride; - const index_t row_offset_do = binfo.q_offset(params.do_batch_stride, params.do_row_stride, bidb) - + m_block * kBlockM * params.do_row_stride + bidh * params.do_head_stride; - const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) - + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride; - // We'll advance gdKaccum and gdVaccum before the first write. - const index_t row_offset_dkv_accum = ((bidb * params.h_k + (bidh / params.h_h_k_ratio)) * params.seqlen_k_rounded - + n_block_max * kBlockN) * params.d_rounded; - const index_t row_offset_lse = (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM; - - // We assume that params.d == kHeadDim for now - Tensor gQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.q_ptr) + row_offset_q), - Shape, Int>{}, - make_stride(params.q_row_stride, _1{})); - Tensor gK = make_tensor(make_gmem_ptr(reinterpret_cast(params.k_ptr) + row_offset_k), - Shape, Int>{}, - make_stride(params.k_row_stride, _1{})); - Tensor gV = make_tensor(make_gmem_ptr(reinterpret_cast(params.v_ptr) + row_offset_v), - Shape, Int>{}, - make_stride(params.v_row_stride, _1{})); - Tensor gdO = make_tensor(make_gmem_ptr(reinterpret_cast(params.do_ptr) + row_offset_do), - Shape, Int>{}, - make_stride(params.do_row_stride, _1{})); - Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast(params.o_ptr) + row_offset_o), - Shape, Int>{}, - make_stride(params.o_row_stride, _1{})); - Tensor gdKaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dk_accum_ptr) + row_offset_dkv_accum), - Shape, Int>{}, - Stride, _1>{}); - Tensor gdVaccum = make_tensor(make_gmem_ptr(reinterpret_cast(params.dv_accum_ptr) + row_offset_dkv_accum), - Shape, Int>{}, - Stride, _1>{}); - Tensor gLSE = make_tensor(make_gmem_ptr(reinterpret_cast(params.softmax_lse_ptr) + row_offset_lse), - Shape>{}, Stride<_1>{}); - - Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast(smem_)), - typename Kernel_traits::SmemLayoutQdO{}); - Tensor sQt = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sQtNoSwizzle = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - Tensor sdO = make_tensor(sQ.data() + size(sQ), typename Kernel_traits::SmemLayoutQdO{}); - Tensor sdOt = make_tensor(sdO.data(), typename Kernel_traits::SmemLayoutQdOtransposed{}); - Tensor sdOtransposedNoSwizzle = make_tensor(sdO.data(), - typename Kernel_traits::SmemLayoutQdOtransposedNoSwizzle{}); - Tensor sK = make_tensor(sdO.data() + size(sdO), typename Kernel_traits::SmemLayoutKV{}); - // Double buffer for sK - Tensor sV = make_tensor(sK.data() + 2 * size(sK), typename Kernel_traits::SmemLayoutKV{}); - Tensor sKt = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposed{}); - Tensor sKtNoSwizzle = make_tensor(sK.data(), typename Kernel_traits::SmemLayoutKtransposedNoSwizzle{}); - Tensor sdS = make_tensor(sV.data() + size(sV), typename Kernel_traits::SmemLayoutPdS{}); - Tensor sdSt = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sdStNoSwizzle = make_tensor(sdS.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - Tensor sP = make_tensor(sdS.data() + size(sdS), typename Kernel_traits::SmemLayoutPdS{}); - Tensor sPt = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposed{}); - Tensor sPtNoSwizzle = make_tensor(sP.data(), typename Kernel_traits::SmemLayoutPdStransposedNoSwizzle{}); - Tensor sdPsum = make_tensor(make_smem_ptr(reinterpret_cast(sdS.data().get())), - Shape>{}); - - typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV; - auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydO gmem_tiled_copy_dO; - auto gmem_thr_copy_dO = gmem_tiled_copy_dO.get_thread_slice(tidx); - typename Kernel_traits::GmemTiledCopydQaccumAtomicAdd gmem_tiled_copy_dKVaccum; - auto gmem_thr_copy_dKVaccum = gmem_tiled_copy_dKVaccum.get_thread_slice(tidx); - - Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ); - Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ); - Tensor tdOgdO = gmem_thr_copy_dO.partition_S(gdO); - Tensor tdOsdO = gmem_thr_copy_dO.partition_D(sdO); - Tensor tdOgO = gmem_thr_copy_dO.partition_S(gO); - Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK); // (KCPY, KCPY_N, KCPY_K) - Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK); - Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV); // (VCPY, VCPY_N, VCPY_K) - Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV); - Tensor tdKgdKaccum = gmem_thr_copy_dKVaccum.partition_D(gdKaccum); - Tensor tdVgdVaccum = gmem_thr_copy_dKVaccum.partition_D(gdVaccum); - - typename Kernel_traits::TiledMmaSdP tiled_mma_sdp; - auto thr_mma_sdp = tiled_mma_sdp.get_thread_slice(tidx); - Tensor tSrQ = thr_mma_sdp.partition_fragment_A(sQ); // (MMA,MMA_N,MMA_K) - Tensor tSrK = thr_mma_sdp.partition_fragment_B(sK); // (MMA,MMA_N,MMA_K) - Tensor tdPrdO = thr_mma_sdp.partition_fragment_A(sdO); // (MMA,MMA_N,MMA_K) - Tensor tdPrV = thr_mma_sdp.partition_fragment_B(sV); // (MMA,MMA_N,MMA_K) - - typename Kernel_traits::TiledMmadKV tiled_mma_dkv; - auto thr_mma_dkv = tiled_mma_dkv.get_thread_slice(tidx); - Tensor tdKrdSt = thr_mma_dkv.partition_fragment_A(sdStNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdKrQt = thr_mma_dkv.partition_fragment_B(sQtNoSwizzle); // (MMA, MMA_K, MMA_N) - Tensor tdVrPt = thr_mma_dkv.partition_fragment_A(sPtNoSwizzle); // (MMA, MMA_N, MMA_N) - Tensor tdVrdO = thr_mma_dkv.partition_fragment_B(sdOtransposedNoSwizzle); // (MMA, MMA_K, MMA_N) - - typename Kernel_traits::TiledMmadQ tiled_mma_dq; - auto thr_mma_dq = tiled_mma_dq.get_thread_slice(tidx); - Tensor tdQrdS = thr_mma_dq.partition_fragment_A(sdS); // (MMA, MMA_N, MMA_N) - Tensor tdQrKt = thr_mma_dq.partition_fragment_B(sKtNoSwizzle); // (MMA, MMA_K, MMA_N) - - Tensor acc_dq = partition_fragment_C(tiled_mma_dq, Shape, Int>{}); // MMA, MMA_M_SdP, MMA_K - - // - // Copy Atom retiling - // - - auto smem_tiled_copy_QdO = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_QdO = smem_tiled_copy_QdO.get_thread_slice(tidx); - Tensor tSsQ = smem_thr_copy_QdO.partition_S(sQ); - Tensor tdPsdO = smem_thr_copy_QdO.partition_S(sdO); - - auto smem_tiled_copy_KV = make_tiled_copy_B_warpcontiguousN(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_sdp); - auto smem_thr_copy_KV = smem_tiled_copy_KV.get_thread_slice(tidx); - Tensor tSsK = smem_thr_copy_KV.partition_S(sK); - Tensor tdPsV = smem_thr_copy_KV.partition_S(sV); - - // Partition sP and sdS to match the accumulator partitioning - // This has to be tiled_mma_sdp, not tiled_mma_dkv - auto smem_tiled_copy_PdS = make_tiled_copy_C_warpcontiguousN(typename Kernel_traits::SmemCopyAtomPdS{}, tiled_mma_sdp); - auto smem_thr_copy_PdS = smem_tiled_copy_PdS.get_thread_slice(tidx); - Tensor tPsP = smem_thr_copy_PdS.partition_D(sP); // ((Atom,AtomNum),PIPE_M,PIPE_N) - Tensor tdSsdS = smem_thr_copy_PdS.partition_D(sdS); // ((Atom,AtomNum),PIPE_M,PIPE_N) - - auto smem_tiled_copy_PdSt = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_PdSt = smem_tiled_copy_PdSt.get_thread_slice(tidx); - Tensor tdVsPt = smem_thr_copy_PdSt.partition_S(sPt); - Tensor tdKsdSt = smem_thr_copy_PdSt.partition_S(sdSt); - - auto smem_tiled_copy_QdOt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dkv); - auto smem_thr_copy_QdOt = smem_tiled_copy_QdOt.get_thread_slice(tidx); - Tensor tdVsdOt = smem_thr_copy_QdOt.partition_S(sdOt); - Tensor tdKsQt = smem_thr_copy_QdOt.partition_S(sQt); - - auto smem_tiled_copy_dS = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma_dq); - auto smem_thr_copy_dS = smem_tiled_copy_dS.get_thread_slice(tidx); - Tensor tdQsdS = smem_thr_copy_dS.partition_S(sdS); - - auto smem_tiled_copy_Kt = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma_dq); - auto smem_thr_copy_Kt = smem_tiled_copy_Kt.get_thread_slice(tidx); - Tensor tdQsKt = smem_thr_copy_Kt.partition_S(sKt); - - // - // PREDICATES - // - - // Construct identity layout for sQ and sK - Tensor cQ = make_identity_tensor(make_shape(size<0>(sQ), size<1>(sQ))); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor cKV = make_identity_tensor(make_shape(size<0>(sK), size<1>(sK))); // (BLK_N,BLK_K) -> (blk_n,blk_k) - // Repeat the partitioning with identity layouts - Tensor tQcQ = gmem_thr_copy_QKV.partition_S(cQ); // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k) - Tensor tKVcKV = gmem_thr_copy_QKV.partition_S(cKV); // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k) - - // Allocate predicate tensors for k - Tensor tQpQ = make_tensor(make_shape(size<2>(tQsQ))); - Tensor tKVpKV = make_tensor(make_shape(size<2>(tKsK))); - - // Set predicates for k bounds - if (!Is_even_K) { - #pragma unroll - for (int k = 0; k < size(tQpQ); ++k) { tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d; } - #pragma unroll - for (int k = 0; k < size(tKVpKV); ++k) { tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; } +#pragma unroll + for (int k = 0; k < size(tKVpKV); ++k) { + tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d; } + } + + // Prologue + + Tensor tdOrdO = make_fragment_like(tdOgdO); + Tensor tdOrO = make_fragment_like(tdOgO); + + // TODO: Might need to exit early and write 0 to gdQ. + + flash::copy( + gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + flash::copy( + gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + + Tensor tQrQ = make_fragment_like(tQgQ); + flash::copy( + gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, + binfo.actual_seqlen_q - m_block * kBlockM); + + int n_block = n_block_max - 1; + if (n_block % 2 == 1) { + tKsK.data() = tKsK.data() + size(sK); + tSsK.data() = tSsK.data() + size(sK); + tdQsKt.data() = tdQsKt.data() + size(sK); + } + + flash::copy( + gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + flash::copy( + gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, + binfo.actual_seqlen_k - n_block * kBlockN); + + Tensor caccS = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) + Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) + static_assert(decltype(size<0>(taccScS))::value == 4); + // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. + Tensor taccScS_row = + logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); + Tensor lse = make_tensor( + Shape>{}); +#pragma unroll + for (int mi = 0; mi < size(lse); ++mi) { + const int row = get<0>(taccScS_row(mi)); + lse(mi) = row < binfo.actual_seqlen_q - m_block * kBlockM ? gLSE(row) : 0; + } + + cute::cp_async_fence(); + + Tensor dP_sum = make_fragment_like(lse); + cute::copy(tdOrdO, tdOsdO); + dot_do_o( + tdOrdO, tdOrO, sdPsum, + Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), + params.p_dropout); + __syncthreads(); +#pragma unroll + for (int mi = 0; mi < size(dP_sum); ++mi) { + dP_sum(mi) = sdPsum(get<0>(taccScS_row(mi))); + } + + auto seed = params.rng_state[0]; + auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; + + clear(acc_dq); + + float alibi_slope = + !Has_alibi ? 0.0f + : reinterpret_cast(params.alibi_slopes_ptr) + [bidb * params.alibi_slopes_batch_stride + bidh] / + params.scale_softmax; + + for (; n_block >= 0; --n_block) { + Tensor acc_s = partition_fragment_C( + tiled_mma_sdp, + Shape, Int>{}); // (MMA=4, MMA_M_SdP, MMA_N) + clear(acc_s); + flash::cp_async_wait<0>(); + __syncthreads(); - // Prologue - - Tensor tdOrdO = make_fragment_like(tdOgdO); - Tensor tdOrO = make_fragment_like(tdOgO); - - // TODO: Might need to exit early and write 0 to gdQ. - - flash::copy( - gmem_tiled_copy_dO, tdOgdO, tdOrdO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM - ); - flash::copy( - gmem_tiled_copy_dO, tdOgO, tdOrO, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM - ); - - Tensor tQrQ = make_fragment_like(tQgQ); - flash::copy( - gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM - ); - - int n_block = n_block_max - 1; - if (n_block % 2 == 1) { - tKsK.data() = tKsK.data() + size(sK); - tSsK.data() = tSsK.data() + size(sK); - tdQsKt.data() = tdQsKt.data() + size(sK); + flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, + smem_thr_copy_KV); + + // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, + // MMA_N)) + Tensor scores = make_tensor( + acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); + + if (Has_alibi) { + flash::apply_alibi( + scores, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + binfo.actual_seqlen_q, AtomLayoutMS * 16, alibi_slope); } - flash::copy( - gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - flash::copy( - gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN - ); - - Tensor caccS = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_N) -> (blk_m,blk_n) - Tensor taccScS = thr_mma_sdp.partition_C(caccS); // (MMA,MMA_N,MMA_N) - static_assert(decltype(size<0>(taccScS))::value == 4); - // Convert to ((2, 2), MMA_N, MMA_N) then take only the row indices. - Tensor taccScS_row = logical_divide(taccScS, Shape<_2>{})(make_coord(0, _), _, 0); - Tensor lse = make_tensor(Shape>{}); - #pragma unroll - for (int mi = 0; mi < size(lse); ++mi) { - const int row = get<0>(taccScS_row(mi)); - lse(mi) = row < binfo.actual_seqlen_q - m_block * kBlockM ? gLSE(row) : 0; + // We don't need to mask out the elements beyond actual_seqlen_k, because + // acc_s would be some finite value for those indices. In the end when we + // multiply with K to get dQ, the corresponding values of K would be 0, so + // the result would still be correct. + if (Is_causal && m_block * kBlockM < (n_block + 1) * kBlockN) { + flash::apply_mask_causal( + scores, + n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, + binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), + // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % + // AtomLayoutMS * 16 + (tidx % 32) / 4, + binfo.actual_seqlen_q, AtomLayoutMS * 16); } - cute::cp_async_fence(); - - Tensor dP_sum = make_fragment_like(lse); - cute::copy(tdOrdO, tdOsdO); - dot_do_o( - tdOrdO, tdOrO, sdPsum, - Kernel_traits::kNThreads / (Kernel_traits::kGmemThreadsPerRow), params.p_dropout - ); - __syncthreads(); - #pragma unroll - for (int mi = 0; mi < size(dP_sum); ++mi) { dP_sum(mi) = sdPsum(get<0>(taccScS_row(mi))); } - - auto seed = params.rng_state[0]; - auto offset = params.rng_state[1] + (bidb * params.h + bidh) * 32 + tidx % 32; - - clear(acc_dq); - - float alibi_slope = !Has_alibi ? 0.0f : reinterpret_cast(params.alibi_slopes_ptr)[bidb * params.alibi_slopes_batch_stride + bidh] / params.scale_softmax; - - for (; n_block >= 0; --n_block) { - Tensor acc_s = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_M_SdP, MMA_N) - clear(acc_s); - flash::cp_async_wait<0>(); - __syncthreads(); - - flash::gemm(acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV); - - // Reshape acc_s from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) - Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout())); - - if (Has_alibi) { - flash::apply_alibi( - scores, - n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, - binfo.actual_seqlen_k, - m_block * kBlockM + get<0>(taccScS_row(0)), - binfo.actual_seqlen_q, - AtomLayoutMS * 16, - alibi_slope - ); - } - - // We don't need to mask out the elements beyond actual_seqlen_k, because acc_s would - // be some finite value for those indices. In the end when we multiply with K to get dQ, - // the corresponding values of K would be 0, so the result would still be correct. - if (Is_causal && m_block * kBlockM < (n_block + 1) * kBlockN) { - flash::apply_mask_causal(scores, n_block * kBlockN + (tidx / 32 / AtomLayoutMS) * MMA_N_SdP * 16, - binfo.actual_seqlen_k, m_block * kBlockM + get<0>(taccScS_row(0)), - // binfo.actual_seqlen_k, m_block * kBlockM + (tidx / 32) % AtomLayoutMS * 16 + (tidx % 32) / 4, - binfo.actual_seqlen_q, - AtomLayoutMS * 16); - } - - // Compute the exponential value. - flash::scale_apply_exp2(scores, lse, params.scale_softmax_log2); - if (Is_dropout) { - int warp_id = tidx / 32; - int block_row_idx = m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; - // Need col to be multiples of 32, since we're doing dropout with block of 16 x 32 - static_assert(MMA_N_SdP % 2 == 0); - int block_col_idx = n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); - Tensor scores_dropped = make_tensor(scores.data(), flash::convert_layout_rowcol_Aregs(scores.layout())); - flash::apply_dropout( - scores_dropped, params.p_dropout_in_uint8_t, seed, offset, - block_row_idx, block_col_idx, AtomLayoutMS - ); - } - // Convert scores from fp32 to fp16/bf16 - Tensor rP = !Is_dropout - ? flash::convert_type(scores) - : flash::convert_type_relu(scores); - // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, MMA_N / 2) - // if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using m16n8k8. - Tensor tPrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs(rP.layout())); - Tensor tPaP = smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); - - Tensor acc_dp = partition_fragment_C(tiled_mma_sdp, Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) - CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA - CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA - - clear(acc_dp); - flash::gemm(acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, - smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, smem_thr_copy_KV); - - // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, MMA_N)) - Tensor dS = make_tensor(acc_dp.data(), scores.layout()); - auto pointwise_mult = [](float p, float dp, float d) { - return p * (!Is_dropout || p >= 0 ? dp - d : d); - }; - #pragma unroll - for (int mi = 0; mi < size<0>(dS); ++mi) { - #pragma unroll - for (int ni = 0; ni < size<1>(dS); ++ni) { - dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); - } - } - - Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); - // Convert dS from fp32 to fp16 - Tensor tdSrdS = flash::convert_type(dS_reshaped); - Tensor tdSadS = smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) - cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); - __syncthreads(); - - if (n_block > 0) { - // Double buffer for sK - const int sK_offset = n_block % 2 == 0 ? size(sK) : -size(sK); - tKsK.data() = tKsK.data() + sK_offset; - tSsK.data() = tSsK.data() + sK_offset; - // Advance gK, gV - tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); - tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); - flash::copy(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV); - flash::copy(gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV); - // This cp_async_fence needs to be in the if block, otherwise the synchronization - // isn't right and we get race conditions. - cute::cp_async_fence(); - } - - Tensor acc_dv = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - clear(acc_dv); - flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); - // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { print(acc_dv); } - tdVgdVaccum.data() = tdVgdVaccum.data() + (-int(kBlockN * params.d_rounded)); - #pragma unroll - for (int i = 0; i < size(acc_dv); ++i) { atomicAdd(&tdVgdVaccum(i), acc_dv(i)); } - - __syncthreads(); - Tensor acc_dk = partition_fragment_C(tiled_mma_dkv, Shape, Int>{}); // MMA, MMA_N, MMA_K - clear(acc_dk); - flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, - smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, smem_thr_copy_QdOt); - tdKgdKaccum.data() = tdKgdKaccum.data() + (-int(kBlockN * params.d_rounded)); - #pragma unroll - for (int i = 0; i < size(acc_dk); ++i) { atomicAdd(&tdKgdKaccum(i), acc_dk(i)); } - - flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, - smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, smem_thr_copy_Kt); - // Double buffer for sK - tdQsKt.data() = tdQsKt.data() + (n_block % 2 == 0 ? size(sK) : -size(sK)); - + // Compute the exponential value. + flash::scale_apply_exp2(scores, lse, + params.scale_softmax_log2); + if (Is_dropout) { + int warp_id = tidx / 32; + int block_row_idx = m_block * (kBlockM / 16) + warp_id % AtomLayoutMS; + // Need col to be multiples of 32, since we're doing dropout with block of + // 16 x 32 + static_assert(MMA_N_SdP % 2 == 0); + int block_col_idx = + n_block * (kBlockN / 32) + (warp_id / AtomLayoutMS) * (MMA_N_SdP / 2); + Tensor scores_dropped = make_tensor( + scores.data(), + flash::convert_layout_rowcol_Aregs( + scores.layout())); + flash::apply_dropout( + scores_dropped, params.p_dropout_in_uint8_t, seed, offset, + block_row_idx, block_col_idx, AtomLayoutMS); + } + // Convert scores from fp32 to fp16/bf16 + Tensor rP = !Is_dropout ? flash::convert_type(scores) + : flash::convert_type_relu(scores); + // Reshape rP from (nrow=(2, MMA_N), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_N, + // MMA_N / 2) if using m16n8k16 or ((2, 2, 1), MMA_N, MMA_N) if using + // m16n8k8. + Tensor tPrP = make_tensor( + rP.data(), + flash::convert_layout_rowcol_Aregs( + rP.layout())); + Tensor tPaP = + smem_thr_copy_PdS.retile_S(tPrP); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tPaP, tPsP); + + Tensor acc_dp = partition_fragment_C( + tiled_mma_sdp, + Shape, Int>{}); // (MMA=4, MMA_N, MMA_N) + CUTE_STATIC_ASSERT_V(size<0>(acc_dp) == size<0>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(acc_dp) == size<1>(acc_s)); // MMA + CUTE_STATIC_ASSERT_V(size<2>(acc_dp) == size<2>(acc_s)); // MMA + + clear(acc_dp); + flash::gemm(acc_dp, tdPrdO, tdPrV, tdPsdO, tdPsV, tiled_mma_sdp, + smem_tiled_copy_QdO, smem_tiled_copy_KV, smem_thr_copy_QdO, + smem_thr_copy_KV); + + // Reshape acc_dp from (MMA=4, MMA_N, MMA_N) to (col=(2, MMA_N), row=(2, + // MMA_N)) + Tensor dS = make_tensor(acc_dp.data(), scores.layout()); + auto pointwise_mult = [](float p, float dp, float d) { + return p * (!Is_dropout || p >= 0 ? dp - d : d); + }; +#pragma unroll + for (int mi = 0; mi < size<0>(dS); ++mi) { +#pragma unroll + for (int ni = 0; ni < size<1>(dS); ++ni) { + dS(mi, ni) = pointwise_mult(scores(mi, ni), dS(mi, ni), dP_sum(mi)); + } } - // Epilogue - - #pragma unroll - for (int i = 0; i < size(acc_dq); ++i) { acc_dq(i) *= params.scale_softmax_rp_dropout; } - // Convert acc_dq from fp32 to fp16 - Tensor rdQ = flash::convert_type(acc_dq); - - Tensor sdQ = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutdQ{}); - - // Partition sdV and sdK to match the accumulator partitioning - auto smem_tiled_copy_dQ = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); - auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor taccdQrdQ = smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) - Tensor taccdQsdQ = smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) - + Tensor dS_reshaped = make_tensor(dS.data(), acc_dp.layout()); + // Convert dS from fp32 to fp16 + Tensor tdSrdS = flash::convert_type(dS_reshaped); + Tensor tdSadS = + smem_thr_copy_PdS.retile_S(tdSrdS); // ((Atom,AtomNum), MMA_N, MMA_N) + cute::copy(smem_tiled_copy_PdS, tdSadS, tdSsdS); __syncthreads(); - cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); - const index_t row_offset_dq = binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) - + m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; - Tensor gdQ = make_tensor(make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), - Shape, Int>{}, - make_stride(params.dq_row_stride, _1{})); + if (n_block > 0) { + // Double buffer for sK + const int sK_offset = n_block % 2 == 0 ? size(sK) : -size(sK); + tKsK.data() = tKsK.data() + sK_offset; + tSsK.data() = tSsK.data() + sK_offset; + // Advance gK, gV + tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride)); + tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride)); + flash::copy(gmem_tiled_copy_QKV, tKgK, + tKsK, tKVcKV, tKVpKV); + flash::copy(gmem_tiled_copy_QKV, tVgV, + tVsV, tKVcKV, tKVpKV); + // This cp_async_fence needs to be in the if block, otherwise the + // synchronization isn't right and we get race conditions. + cute::cp_async_fence(); + } - typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; - auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); - Tensor tdQsdQ = gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) - Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); + Tensor acc_dv = partition_fragment_C( + tiled_mma_dkv, + Shape, Int>{}); // MMA, MMA_N, MMA_K + clear(acc_dv); + flash::gemm(acc_dv, tdVrPt, tdVrdO, tdVsPt, tdVsdOt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, + smem_thr_copy_QdOt); + // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { + // print(acc_dv); } + tdVgdVaccum.data() = + tdVgdVaccum.data() + (-int(kBlockN * params.d_rounded)); +#pragma unroll + for (int i = 0; i < size(acc_dv); ++i) { + atomicAdd(&tdVgdVaccum(i), acc_dv(i)); + } __syncthreads(); + Tensor acc_dk = partition_fragment_C( + tiled_mma_dkv, + Shape, Int>{}); // MMA, MMA_N, MMA_K + clear(acc_dk); + flash::gemm(acc_dk, tdKrdSt, tdKrQt, tdKsdSt, tdKsQt, tiled_mma_dkv, + smem_tiled_copy_PdSt, smem_tiled_copy_QdOt, smem_thr_copy_PdSt, + smem_thr_copy_QdOt); + tdKgdKaccum.data() = + tdKgdKaccum.data() + (-int(kBlockN * params.d_rounded)); +#pragma unroll + for (int i = 0; i < size(acc_dk); ++i) { + atomicAdd(&tdKgdKaccum(i), acc_dk(i)); + } - Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); - cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); - - Tensor cdQ = make_identity_tensor(Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) - Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); - Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); - if (!Is_even_K) { - #pragma unroll - for (int k = 0; k < size(tdQpdQ); ++k) { tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; } + flash::gemm(acc_dq, tdQrdS, tdQrKt, tdQsdS, tdQsKt, tiled_mma_dq, + smem_tiled_copy_dS, smem_tiled_copy_Kt, smem_thr_copy_dS, + smem_thr_copy_Kt); + // Double buffer for sK + tdQsKt.data() = tdQsKt.data() + (n_block % 2 == 0 ? size(sK) : -size(sK)); + } + + // Epilogue + +#pragma unroll + for (int i = 0; i < size(acc_dq); ++i) { + acc_dq(i) *= params.scale_softmax_rp_dropout; + } + // Convert acc_dq from fp32 to fp16 + Tensor rdQ = flash::convert_type(acc_dq); + + Tensor sdQ = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutdQ{}); + + // Partition sdV and sdK to match the accumulator partitioning + auto smem_tiled_copy_dQ = + make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomdQ{}, tiled_mma_dq); + auto smem_thr_copy_dQ = smem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor taccdQrdQ = + smem_thr_copy_dQ.retile_S(rdQ); // ((Atom,AtomNum), MMA_N, MMA_N) + Tensor taccdQsdQ = + smem_thr_copy_dQ.partition_D(sdQ); // ((Atom,AtomNum),PIPE_M,PIPE_N) + + __syncthreads(); + cute::copy(smem_tiled_copy_dQ, taccdQrdQ, taccdQsdQ); + + const index_t row_offset_dq = + binfo.q_offset(params.dq_batch_stride, params.dq_row_stride, bidb) + + m_block * kBlockM * params.dq_row_stride + bidh * params.dq_head_stride; + Tensor gdQ = make_tensor( + make_gmem_ptr(reinterpret_cast(params.dq_ptr) + row_offset_dq), + Shape, Int>{}, + make_stride(params.dq_row_stride, _1{})); + + typename Kernel_traits::GmemTiledCopydQ gmem_tiled_copy_dQ; + auto gmem_thr_copy_dQ = gmem_tiled_copy_dQ.get_thread_slice(tidx); + Tensor tdQsdQ = + gmem_thr_copy_dQ.partition_S(sdQ); // ((Atom,AtomNum),ATOM_M,ATOM_N) + Tensor tdQgdQ = gmem_thr_copy_dQ.partition_D(gdQ); + + __syncthreads(); + + Tensor tdQrdQ = make_tensor(shape(tdQgdQ)); + cute::copy(gmem_tiled_copy_dQ, tdQsdQ, tdQrdQ); + + Tensor cdQ = make_identity_tensor( + Shape, Int>{}); // (BLK_M,BLK_K) -> (blk_m,blk_k) + Tensor tdQcdQ = gmem_thr_copy_dQ.partition_D(cdQ); + Tensor tdQpdQ = make_tensor(make_shape(size<2>(tdQgdQ))); + if (!Is_even_K) { +#pragma unroll + for (int k = 0; k < size(tdQpdQ); ++k) { + tdQpdQ(k) = get<1>(tdQcdQ(0, 0, k)) < params.d; } - // Clear_OOB_K must be false since we don't want to write zeros to gmem - flash::copy( - gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, tdQpdQ, binfo.actual_seqlen_q - m_block * kBlockM - ); + } + // Clear_OOB_K must be false since we don't want to write zeros to gmem + flash::copy(gmem_tiled_copy_dQ, tdQrdQ, tdQgdQ, tdQcdQ, + tdQpdQ, + binfo.actual_seqlen_q - m_block * kBlockM); } //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template inline __device__ void compute_dq_dk_dv(const Params ¶ms) { - // The block index for the batch. - const int bidb = blockIdx.x; - // const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.y; - // const int bidh = blockIdx.z; - // The thread index. - const int tidx = threadIdx.x; - - const int n_block_max = (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; - if (n_block_max == 1) { - compute_dq_dk_dv_1colblock(params, bidb, bidh, 0); - } else { - // Iterating backward from n_block_max - 1 to 0 might save 1 register - compute_dq_dk_dv_1colblock(params, bidb, bidh, n_block_max - 1); - for (int n_block = n_block_max - 2; n_block > 0; n_block--) { - compute_dq_dk_dv_1colblock(params, bidb, bidh, n_block); - } - compute_dq_dk_dv_1colblock(params, bidb, bidh, 0); + // The block index for the batch. + const int bidb = blockIdx.x; + // const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.y; + // const int bidh = blockIdx.z; + // The thread index. + const int tidx = threadIdx.x; + + const int n_block_max = + (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; + if (n_block_max == 1) { + compute_dq_dk_dv_1colblock(params, bidb, + bidh, 0); + } else { + // Iterating backward from n_block_max - 1 to 0 might save 1 register + compute_dq_dk_dv_1colblock( + params, bidb, bidh, n_block_max - 1); + for (int n_block = n_block_max - 2; n_block > 0; n_block--) { + compute_dq_dk_dv_1colblock( + params, bidb, bidh, n_block); } + compute_dq_dk_dv_1colblock(params, bidb, + bidh, 0); + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template inline __device__ void compute_dq_dk_dv_seqk_parallel(const Params ¶ms) { - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; - - // If deterministic, each thread block will do atomicAdd to a different dQ_accum buffer. - for (int n_block = blockIdx.x; n_block < (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; n_block += gridDim.x) { - compute_dq_dk_dv_1colblock(params, bidb, bidh, n_block); - } + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; + + // If deterministic, each thread block will do atomicAdd to a different + // dQ_accum buffer. + for (int n_block = blockIdx.x; + n_block < + (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; + n_block += gridDim.x) { + compute_dq_dk_dv_1colblock(params, bidb, bidh, + n_block); + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template inline __device__ void compute_dq_dk_dv_seqq_parallel(const Params ¶ms) { - const int m_block = blockIdx.x; - // The block index for the batch. - const int bidb = blockIdx.y; - // The block index for the head. - const int bidh = blockIdx.z; + const int m_block = blockIdx.x; + // The block index for the batch. + const int bidb = blockIdx.y; + // The block index for the head. + const int bidh = blockIdx.z; - compute_dq_dk_dv_1rowblock(params, bidb, bidh, m_block); + compute_dq_dk_dv_1rowblock(params, bidb, bidh, m_block); } //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h b/external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h index a72664f32..4b4942dbb 100644 --- a/external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h +++ b/external_libs/runtime/flash_attn/lib/flash_bwd_launch_template.h @@ -4,361 +4,496 @@ // #include -#include "static_switch.h" #include "flash.h" #include "flash_bwd_kernel.h" +#include "static_switch.h" -template +template __global__ void flash_bwd_dot_do_o_kernel(Flash_bwd_params params) { - flash::compute_dot_do_o(params); + flash::compute_dot_do_o(params); } -template +template __global__ void flash_bwd_clear_dkvaccum_kernel(Flash_bwd_params params) { - flash::clear_dKVaccum(params); + flash::clear_dKVaccum(params); } -template +template __global__ void flash_bwd_dq_dk_dv_loop_kernel(Flash_bwd_params params) { - flash::compute_dq_dk_dv(params); + flash::compute_dq_dk_dv(params); } -template -__global__ void flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel(Flash_bwd_params params) { - static_assert(!(Is_causal && Is_local)); // If Is_local is true, Is_causal should be false - flash::compute_dq_dk_dv_seqk_parallel(params); +template +__global__ void +flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel(Flash_bwd_params params) { + static_assert(!(Is_causal && + Is_local)); // If Is_local is true, Is_causal should be false + flash::compute_dq_dk_dv_seqk_parallel(params); } -template -__global__ void flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel(Flash_bwd_params params) { - flash::compute_dq_dk_dv_seqq_parallel(params); +template +__global__ void +flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel(Flash_bwd_params params) { + flash::compute_dq_dk_dv_seqq_parallel( + params); } -template -__global__ void flash_bwd_convert_dq_kernel(Flash_bwd_params params, const int nsplits) { - flash::convert_dQ(params, nsplits); +template +__global__ void flash_bwd_convert_dq_kernel(Flash_bwd_params params, + const int nsplits) { + flash::convert_dQ(params, nsplits); } -template +template __global__ void flash_bwd_convert_dkv_kernel(Flash_bwd_params params) { - flash::convert_dKV(params); + flash::convert_dKV(params); } -template -void run_flash_bwd_seqk_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; - dim3 grid_m(num_m_block, params.b, params.h); - const int num_n_block = (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; - int gridDimx = num_n_block; - if (params.deterministic) { - // auto dprops = at::cuda::getCurrentDeviceProperties(); - cudaDeviceProp dprops; - cudaGetDeviceProperties(&dprops, 0); - gridDimx = (dprops.multiProcessorCount + params.b * params.h - 1) / (params.b * params.h); - } - dim3 grid_n(gridDimx, params.b, params.h); +template +void run_flash_bwd_seqk_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + const int num_m_block = + (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid_m(num_m_block, params.b, params.h); + const int num_n_block = + (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; + int gridDimx = num_n_block; + if (params.deterministic) { + // auto dprops = at::cuda::getCurrentDeviceProperties(); + cudaDeviceProp dprops; + cudaGetDeviceProperties(&dprops, 0); + gridDimx = (dprops.multiProcessorCount + params.b * params.h - 1) / + (params.b * params.h); + } + dim3 grid_n(gridDimx, params.b, params.h); - if (!params.deterministic) { - flash_bwd_dot_do_o_kernel<<>>(params); - } else { - flash_bwd_dot_do_o_kernel<<>>(params); - } - // C10_CUDA_KERNEL_LAUNCH_CHECK(); + if (!params.deterministic) { + flash_bwd_dot_do_o_kernel + <<>>(params); + } else { + flash_bwd_dot_do_o_kernel + <<>>(params); + } + // C10_CUDA_KERNEL_LAUNCH_CHECK(); - // We want to specialize to is_even_MN and not just is_even_M, since in the case where N is not - // a multiple of kBlockN, we'll need to apply mask in the loop. - const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_q % Kernel_traits::kBlockM == 0 && params.seqlen_k % Kernel_traits::kBlockN == 0; - const bool is_even_K = params.d == Kernel_traits::kHeadDim; - constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1colblock; - // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - BOOL_SWITCH((params.window_size_left >= 0 || params.window_size_right >= 0) && !params.is_causal, Is_local, [&] { - BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { - // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. - // If head dim > 128, set IsEvenMNConst to false to reduce number of templates - // If Is_local, set Is_causal to false - auto kernel = &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel; - // auto kernel = &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel; - if (smem_size_dq_dk_dv >= 48 * 1024) { - cudaFuncSetAttribute( - kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size_dq_dk_dv); - } - kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - }); + // We want to specialize to is_even_MN and not just is_even_M, since in the + // case where N is not a multiple of kBlockN, we'll need to apply mask in the + // loop. + const bool is_even_MN = params.cu_seqlens_q == nullptr && + params.cu_seqlens_k == nullptr && + params.seqlen_q % Kernel_traits::kBlockM == 0 && + params.seqlen_k % Kernel_traits::kBlockN == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1colblock; + // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH( + (params.window_size_left >= 0 || params.window_size_right >= 0) && + !params.is_causal, + Is_local, [&] { + BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // If not IsEvenKConst, we also set IsEvenMNConst to false to + // reduce number of templates. If head dim > 128, set + // IsEvenMNConst to false to reduce number of templates If + // Is_local, set Is_causal to false + auto kernel = &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel < + Kernel_traits, + Is_dropout, Is_causal, Is_local && !Is_causal, Has_alibi, + IsEvenMNConst && IsEvenKConst && !Is_local && + Kernel_traits::kHeadDim <= 128, + IsEvenKConst > ; + // auto kernel = + // &flash_bwd_dq_dk_dv_loop_seqk_parallel_kernel; + if (smem_size_dq_dk_dv >= 48 * 1024) { + cudaFuncSetAttribute( + kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size_dq_dk_dv); + } + kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); }); - }); + }); }); + }); - auto kernel_dq = &flash_bwd_convert_dq_kernel; - if (Kernel_traits::kSmemdQSize >= 48 * 1024) { - cudaFuncSetAttribute( - kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemdQSize); - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemdQSize)); - } - kernel_dq<<>>(params, !params.deterministic ? 1 : gridDimx); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); + auto kernel_dq = &flash_bwd_convert_dq_kernel; + if (Kernel_traits::kSmemdQSize >= 48 * 1024) { + cudaFuncSetAttribute(kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, + Kernel_traits::kSmemdQSize); + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel_dq, cudaFuncAttributeMaxDynamicSharedMemorySize, + // Kernel_traits::kSmemdQSize)); + } + kernel_dq<<>>(params, !params.deterministic ? 1 : gridDimx); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); } -template -void run_flash_bwd_seqq_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - const int num_n_block = (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; - dim3 grid_n(num_n_block, params.b, params.h_k); - flash_bwd_clear_dkvaccum_kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); +template +void run_flash_bwd_seqq_parallel(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + const int num_n_block = + (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_traits::kBlockN; + dim3 grid_n(num_n_block, params.b, params.h_k); + flash_bwd_clear_dkvaccum_kernel + <<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); - const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; - dim3 grid_m(num_m_block, params.b, params.h); - // We also use is_even_N to set Unpadded in the BlockInfo constructor, so we need to check - // for cu_seqlens_k as well. - const bool is_even_N = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0; - const bool is_even_K = params.d == Kernel_traits::kHeadDim; - constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1rowblock; - // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - BOOL_SWITCH(is_even_N, IsEvenNConst, [&] { - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { - // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. - auto kernel = &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel; - // auto kernel = &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel; - if (smem_size_dq_dk_dv >= 48 * 1024) { - cudaFuncSetAttribute( - kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size_dq_dk_dv); - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size_dq_dk_dv)); - } - kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - }); + const int num_m_block = + (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid_m(num_m_block, params.b, params.h); + // We also use is_even_N to set Unpadded in the BlockInfo constructor, so we + // need to check for cu_seqlens_k as well. + const bool is_even_N = params.cu_seqlens_q == nullptr && + params.cu_seqlens_k == nullptr && + params.seqlen_k % Kernel_traits::kBlockN == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + constexpr int smem_size_dq_dk_dv = Kernel_traits::kSmemSize1rowblock; + // printf("smem_size_dq_dk_dv = %d\n", smem_size_dq_dk_dv); + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + BOOL_SWITCH(is_even_N, IsEvenNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce + // number of templates. + auto kernel = &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel < + Kernel_traits, + Is_dropout, Is_causal, Has_alibi, IsEvenNConst && IsEvenKConst, + IsEvenKConst > ; + // auto kernel = + // &flash_bwd_dq_dk_dv_loop_seqq_parallel_kernel; + if (smem_size_dq_dk_dv >= 48 * 1024) { + cudaFuncSetAttribute(kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size_dq_dk_dv); + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, + // smem_size_dq_dk_dv)); + } + kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); }); + }); }); + }); - auto kernel_dkv = &flash_bwd_convert_dkv_kernel; - if (Kernel_traits::kSmemKVSize >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel_dkv, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemKVSize)); - cudaFuncSetAttribute( - kernel_dkv, cudaFuncAttributeMaxDynamicSharedMemorySize, Kernel_traits::kSmemKVSize); - } - kernel_dkv<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); + auto kernel_dkv = &flash_bwd_convert_dkv_kernel; + if (Kernel_traits::kSmemKVSize >= 48 * 1024) { + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel_dkv, cudaFuncAttributeMaxDynamicSharedMemorySize, + // Kernel_traits::kSmemKVSize)); + cudaFuncSetAttribute(kernel_dkv, + cudaFuncAttributeMaxDynamicSharedMemorySize, + Kernel_traits::kSmemKVSize); + } + kernel_dkv<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); } -template -void run_flash_bwd(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - if (configure) return; - run_flash_bwd_seqk_parallel(params, stream, configure); +template +void run_flash_bwd(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + if (configure) + return; + run_flash_bwd_seqk_parallel(params, stream, + configure); } -template -void run_mha_bwd_hdim32(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 32; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= 2 * ((3 * 128 + 2 * 128) * Headdim + 2 * 128 * 128)) { // 104 KB - if constexpr(!Is_dropout) { // We can afford more registers to keep V in registers - run_flash_bwd, Is_dropout>(params, stream, configure); - } else { - run_flash_bwd, Is_dropout>(params, stream, configure); - } - } else { // 96 KB - run_flash_bwd, Is_dropout>(params, stream, configure); - } - }); +template +void run_mha_bwd_hdim32(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 32; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= + 2 * ((3 * 128 + 2 * 128) * Headdim + 2 * 128 * 128)) { // 104 KB + if constexpr (!Is_dropout) { // We can afford more registers to keep V in + // registers + run_flash_bwd, + Is_dropout>(params, stream, configure); + } else { + run_flash_bwd, + Is_dropout>(params, stream, configure); + } + } else { // 96 KB + run_flash_bwd, + Is_dropout>(params, stream, configure); + } + }); } -template -void run_mha_bwd_hdim64(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 64; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - // Changing AtomLayoutMdQ from 2 to 4 takes the same time - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // This is slightly faster. We want to split M more so we need fewer registers to store LSE. - if (max_smem_per_block >= 144 * 1024) { - run_flash_bwd, Is_dropout>(params, stream, configure); - // This has a lot of register spilling - // run_flash_bwd, Is_dropout>(params, stream, configure); - } else { - // if (params.h == params.h_k) { - // run_flash_bwd, Is_dropout>(params, stream, configure); - run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // } else { - // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, configure); - // } - } - }); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // M=128, N=64 is quite slow, I think because we need to read/write dQaccum twice as many times - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); +template +void run_mha_bwd_hdim64(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 64; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + // Changing AtomLayoutMdQ from 2 to 4 takes the same time + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); This is + // slightly faster. We want to split M more so we need fewer registers to + // store LSE. + if (max_smem_per_block >= 144 * 1024) { + run_flash_bwd, + Is_dropout>(params, stream, configure); + // This has a lot of register spilling + // run_flash_bwd, Is_dropout>(params, stream, configure); + } else { + // if (params.h == params.h_k) { + // run_flash_bwd, Is_dropout>(params, stream, configure); + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // } else { + // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, + // configure); + // } + } + }); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); M=128, N=64 is quite slow, I think + // because we need to read/write dQaccum twice as many times + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); + // run_flash_bwd>(params, stream, configure); } -template -void run_mha_bwd_hdim96(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 96; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); +template +void run_mha_bwd_hdim96(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 96; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + // if (params.h == params.h_k) { + if (max_smem_per_block >= 116 * 1024) { + if constexpr (!Is_dropout) { // 92KB + run_flash_bwd, + Is_dropout>(params, stream, configure); + } else { // 116 KB + // This is faster for dropout since we don't have many registers to + // spare + run_flash_bwd, + Is_dropout>(params, stream, configure); + } + } else { + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } + // } else { + // run_flash_bwd_seqq_parallel>(params, stream, configure); // } - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - // if (params.h == params.h_k) { - if (max_smem_per_block >= 116 * 1024) { - if constexpr(!Is_dropout) { // 92KB - run_flash_bwd, Is_dropout>(params, stream, configure); - } else { // 116 KB - // This is faster for dropout since we don't have many registers to spare - run_flash_bwd, Is_dropout>(params, stream, configure); - } - } else { - run_flash_bwd, Is_dropout>(params, stream, configure); - } - // } else { - // run_flash_bwd_seqq_parallel>(params, stream, configure); - // } - }); + }); } -template -void run_mha_bwd_hdim128(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 128; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - // if (params.h == params.h_k) { - // run_flash_bwd>(params, stream, configure); - // This is faster, in the case of sequence-parallel bwd (where we need fewer registers). - // Out of these three, the 2nd one is slightly faster (2% faster than the first). Idk why. - // run_flash_bwd>(params, stream, configure); - if (max_smem_per_block >= 144 * 1024) { - run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); - // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); - // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - // run_flash_bwd, Is_dropout>(params, stream, configure); - } else { - // run_flash_bwd, Is_dropout>(params, stream, configure); - run_flash_bwd, Is_dropout>(params, stream, configure); - } - // run_flash_bwd>(params, stream, configure); +template +void run_mha_bwd_hdim128(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 128; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + // if (params.h == params.h_k) { + // run_flash_bwd>(params, stream, configure); This is faster, in the case + // of sequence-parallel bwd (where we need fewer registers). Out of these + // three, the 2nd one is slightly faster (2% faster than the first). Idk + // why. run_flash_bwd>(params, stream, configure); + if (max_smem_per_block >= 144 * 1024) { + run_flash_bwd, + Is_dropout>(params, stream, configure); + // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); + // run_flash_bwd_seqk_parallel, Is_dropout>(params, stream, configure); + // run_flash_bwd_seqq_parallel, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + // run_flash_bwd, Is_dropout>(params, stream, configure); + } else { + // run_flash_bwd, Is_dropout>(params, stream, configure); + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } + // run_flash_bwd>(params, stream, configure); - // run_flash_bwd>(params, stream, configure); - // } else { - // run_flash_bwd_seqq_parallel>(params, stream, configure); - // } - }); + // run_flash_bwd>(params, stream, configure); + // } else { + // run_flash_bwd_seqq_parallel>(params, stream, configure); + // } + }); } -template -void run_mha_bwd_hdim160(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 160; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= 116 * 1024) { - run_flash_bwd, Is_dropout>(params, stream, configure); - } else { - run_flash_bwd, Is_dropout>(params, stream, configure); - } - }); +template +void run_mha_bwd_hdim160(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 160; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= 116 * 1024) { + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } else { + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } + }); } -template -void run_mha_bwd_hdim192(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 192; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= 136 * 1024) { - run_flash_bwd, Is_dropout>(params, stream, configure); - } else { - run_flash_bwd, Is_dropout>(params, stream, configure); - } - }); +template +void run_mha_bwd_hdim192(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 192; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= 136 * 1024) { + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } else { + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } + }); } -template -void run_mha_bwd_hdim224(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 224; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - run_flash_bwd, Is_dropout>(params, stream, configure); - }); +template +void run_mha_bwd_hdim224(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 224; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + }); } -template -void run_mha_bwd_hdim256(Flash_bwd_params ¶ms, cudaStream_t stream, const bool configure) { - constexpr static int Headdim = 256; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - if (max_smem_per_block >= 176 * 1024) { // H100 - run_flash_bwd, Is_dropout>(params, stream, configure); - } else { // A100, we don't do double buffering to save smem - run_flash_bwd, Is_dropout>(params, stream, configure); - } - }); +template +void run_mha_bwd_hdim256(Flash_bwd_params ¶ms, cudaStream_t stream, + const bool configure) { + constexpr static int Headdim = 256; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + if (max_smem_per_block >= 176 * 1024) { // H100 + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } else { // A100, we don't do double buffering to save smem + run_flash_bwd< + Flash_bwd_kernel_traits, + Is_dropout>(params, stream, configure); + } + }); } diff --git a/external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h b/external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h index 75ad04499..40840ae15 100644 --- a/external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h +++ b/external_libs/runtime/flash_attn/lib/flash_fwd_launch_template.h @@ -6,351 +6,501 @@ // #include -#include "static_switch.h" #include "flash.h" #include "flash_fwd_kernel.h" +#include "static_switch.h" -template +template __global__ void flash_fwd_kernel(Flash_fwd_params params) { - static_assert(!(Is_causal && Is_local)); // If Is_local is true, Is_causal should be false - flash::compute_attn(params); + static_assert(!(Is_causal && + Is_local)); // If Is_local is true, Is_causal should be false + flash::compute_attn(params); } -template +template __global__ void flash_fwd_splitkv_kernel(Flash_fwd_params params) { - flash::compute_attn_splitkv(params); + flash::compute_attn_splitkv(params); } -template +template __global__ void flash_fwd_splitkv_combine_kernel(Flash_fwd_params params) { - static_assert(Log_max_splits >= 1); - flash::combine_attn_seqk_parallel(params); + static_assert(Log_max_splits >= 1); + flash::combine_attn_seqk_parallel(params); } -template +template void run_flash_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr size_t smem_size = Kernel_traits::kSmemSize; - // printf("smem_size = %d\n", smem_size); + constexpr size_t smem_size = Kernel_traits::kSmemSize; + // printf("smem_size = %d\n", smem_size); - // Work-around for gcc 7. It doesn't like nested BOOL_SWITCH. - // https://github.com/kokkos/kokkos-kernels/issues/349 - // https://github.com/HazyResearch/flash-attention/issues/21 + // Work-around for gcc 7. It doesn't like nested BOOL_SWITCH. + // https://github.com/kokkos/kokkos-kernels/issues/349 + // https://github.com/HazyResearch/flash-attention/issues/21 - const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; - dim3 grid(num_m_block, params.b, params.h); - const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0 && params.seqlen_q % Kernel_traits::kBlockM == 0; - const bool is_even_K = params.d == Kernel_traits::kHeadDim; - const bool return_softmax = params.p_ptr != nullptr; - BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - BOOL_SWITCH((params.window_size_left >= 0 || params.window_size_right >= 0) && !Is_causal, Is_local, [&] { - BOOL_SWITCH(return_softmax, ReturnSoftmaxConst, [&] { - BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { - // Will only return softmax if dropout, to reduce compilation time. - // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. - // If return_softmax, set IsEvenMNConst to false to reduce number of templates - // If head dim > 128, set IsEvenMNConst to false to reduce number of templates - // If Is_local, set Is_causal to false - auto kernel = &flash_fwd_kernel; - // auto kernel = &flash_fwd_kernel; - // printf("IsEvenMNConst = %d, IsEvenKConst = %d, Is_local = %d, Is_causal = %d, ReturnSoftmaxConst = %d, Is_dropout = %d\n", int(IsEvenMNConst), int(IsEvenKConst), int(Is_local), int(Is_causal), int(ReturnSoftmaxConst), int(Is_dropout)); - // auto kernel = &flash_fwd_kernel; - if (smem_size >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); - cudaFuncSetAttribute( - kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); - } - // int ctas_per_sm; - // cudaError status_ = cudaOccupancyMaxActiveBlocksPerMultiprocessor( - // &ctas_per_sm, kernel, Kernel_traits::kNThreads, smem_size); - // printf("smem_size = %d, CTAs per SM = %d\n", int(smem_size), ctas_per_sm); - kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - }); + const int num_m_block = + (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid(num_m_block, params.b, params.h); + const bool is_even_MN = params.cu_seqlens_q == nullptr && + params.cu_seqlens_k == nullptr && + params.seqlen_k % Kernel_traits::kBlockN == 0 && + params.seqlen_q % Kernel_traits::kBlockM == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + const bool return_softmax = params.p_ptr != nullptr; + BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH( + (params.window_size_left >= 0 || params.window_size_right >= 0) && + !Is_causal, + Is_local, [&] { + BOOL_SWITCH(return_softmax, ReturnSoftmaxConst, [&] { + BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // Will only return softmax if dropout, to reduce compilation + // time. If not IsEvenKConst, we also set IsEvenMNConst to false + // to reduce number of templates. If return_softmax, set + // IsEvenMNConst to false to reduce number of templates If head + // dim > 128, set IsEvenMNConst to false to reduce number of + // templates If Is_local, set Is_causal to false + auto kernel = &flash_fwd_kernel < Kernel_traits, Is_dropout, + Is_causal, Is_local && !Is_causal, Has_alibi, + IsEvenMNConst && IsEvenKConst && !Is_local && + !ReturnSoftmaxConst && Kernel_traits::kHeadDim <= 128, + IsEvenKConst, ReturnSoftmaxConst && Is_dropout > ; + // auto kernel = &flash_fwd_kernel; + // printf("IsEvenMNConst = %d, IsEvenKConst = %d, Is_local = %d, + // Is_causal = %d, ReturnSoftmaxConst = %d, Is_dropout = %d\n", + // int(IsEvenMNConst), int(IsEvenKConst), int(Is_local), + // int(Is_causal), int(ReturnSoftmaxConst), int(Is_dropout)); + // auto kernel = &flash_fwd_kernel; + if (smem_size >= 48 * 1024) { + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, + // smem_size)); + cudaFuncSetAttribute( + kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size); + } + // int ctas_per_sm; + // cudaError status_ = + // cudaOccupancyMaxActiveBlocksPerMultiprocessor( + // &ctas_per_sm, kernel, Kernel_traits::kNThreads, + // smem_size); + // printf("smem_size = %d, CTAs per SM = %d\n", int(smem_size), + // ctas_per_sm); + kernel<<>>( + params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); }); - }); + }); }); + }); } -template +template void run_flash_splitkv_fwd(Flash_fwd_params ¶ms, cudaStream_t stream) { - static_assert(!Kernel_traits::Is_Q_in_regs, "SplitKV implementation does not support Is_Q_in_regs"); - static_assert(!Kernel_traits::Share_Q_K_smem, "SplitKV implementation does not support Share_Q_K_smem"); - constexpr size_t smem_size = Kernel_traits::kSmemSize; - const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; - dim3 grid(num_m_block, params.num_splits > 1 ? params.num_splits : params.b, params.num_splits > 1 ? params.b * params.h : params.h); - const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0 && params.seqlen_q % Kernel_traits::kBlockM == 0; - const bool is_even_K = params.d == Kernel_traits::kHeadDim; - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - BOOL_SWITCH((params.window_size_left >= 0 || params.window_size_right >= 0) && !Is_causal, Is_local, [&] { - BOOL_SWITCH(params.num_splits > 1, Split, [&] { - BOOL_SWITCH(params.knew_ptr != nullptr, Append_KV, [&] { - BOOL_SWITCH(params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { - // If Append_KV, then we must have seqlen_offsets, which means cu_seqlens_k != nullptr. - // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates. - // If Is_local, set Is_causal to false - auto kernel = &flash_fwd_splitkv_kernel; - // auto kernel = &flash_fwd_splitkv_kernel; - // auto kernel = &flash_fwd_splitkv_kernel; - if (smem_size >= 48 * 1024) { - // C10_CUDA_CHECK(cudaFuncSetAttribute( - // kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); - cudaFuncSetAttribute( - kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); - } - kernel<<>>(params); - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - }); - }); + static_assert(!Kernel_traits::Is_Q_in_regs, + "SplitKV implementation does not support Is_Q_in_regs"); + static_assert(!Kernel_traits::Share_Q_K_smem, + "SplitKV implementation does not support Share_Q_K_smem"); + constexpr size_t smem_size = Kernel_traits::kSmemSize; + const int num_m_block = + (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM; + dim3 grid(num_m_block, params.num_splits > 1 ? params.num_splits : params.b, + params.num_splits > 1 ? params.b * params.h : params.h); + const bool is_even_MN = params.cu_seqlens_q == nullptr && + params.cu_seqlens_k == nullptr && + params.seqlen_k % Kernel_traits::kBlockN == 0 && + params.seqlen_q % Kernel_traits::kBlockM == 0; + const bool is_even_K = params.d == Kernel_traits::kHeadDim; + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] { + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + BOOL_SWITCH( + (params.window_size_left >= 0 || params.window_size_right >= 0) && + !Is_causal, + Is_local, [&] { + BOOL_SWITCH(params.num_splits > 1, Split, [&] { + BOOL_SWITCH(params.knew_ptr != nullptr, Append_KV, [&] { + BOOL_SWITCH( + params.alibi_slopes_ptr != nullptr, Has_alibi, [&] { + // If Append_KV, then we must have seqlen_offsets, which + // means cu_seqlens_k != nullptr. If not IsEvenKConst, + // we also set IsEvenMNConst to false to reduce number + // of templates. If Is_local, set Is_causal to false + auto kernel = &flash_fwd_splitkv_kernel < Kernel_traits, + Is_causal, Is_local && !Is_causal, Has_alibi, + IsEvenMNConst && !Append_KV && IsEvenKConst && + !Is_local && Kernel_traits::kHeadDim <= 128, + IsEvenKConst, Split, Append_KV > ; + // auto kernel = + // &flash_fwd_splitkv_kernel; auto kernel = + // &flash_fwd_splitkv_kernel; + if (smem_size >= 48 * 1024) { + // C10_CUDA_CHECK(cudaFuncSetAttribute( + // kernel, + // cudaFuncAttributeMaxDynamicSharedMemorySize, + // smem_size)); + cudaFuncSetAttribute( + kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size); + } + kernel<<>>(params); + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); }); + }); }); - }); + }); }); - if (params.num_splits > 1) { - // We want kBlockM to be as small as possible for more parallelism. - // With 128 threads we can load 512 elements at a time, so if headdim is divisible by 128, kBlockM = 4. - // If headdim is divisible by 64, then we set kBlockM = 8, etc. - constexpr static int kBlockM = Kernel_traits::kHeadDim % 128 == 0 ? 4 : (Kernel_traits::kHeadDim % 64 == 0 ? 8 : 16); - dim3 grid_combine((params.b * params.h * params.seqlen_q + kBlockM - 1) / kBlockM); - BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { - if (params.num_splits <= 2) { - flash_fwd_splitkv_combine_kernel<<>>(params); - } else if (params.num_splits <= 4) { - flash_fwd_splitkv_combine_kernel<<>>(params); - } else if (params.num_splits <= 8) { - flash_fwd_splitkv_combine_kernel<<>>(params); - } else if (params.num_splits <= 16) { - flash_fwd_splitkv_combine_kernel<<>>(params); - } else if (params.num_splits <= 32) { - flash_fwd_splitkv_combine_kernel<<>>(params); - } else if (params.num_splits <= 64) { - flash_fwd_splitkv_combine_kernel<<>>(params); - } else if (params.num_splits <= 128) { - flash_fwd_splitkv_combine_kernel<<>>(params); - } - // C10_CUDA_KERNEL_LAUNCH_CHECK(); - }); - } + }); + if (params.num_splits > 1) { + // We want kBlockM to be as small as possible for more parallelism. + // With 128 threads we can load 512 elements at a time, so if headdim is + // divisible by 128, kBlockM = 4. If headdim is divisible by 64, then we set + // kBlockM = 8, etc. + constexpr static int kBlockM = + Kernel_traits::kHeadDim % 128 == 0 + ? 4 + : (Kernel_traits::kHeadDim % 64 == 0 ? 8 : 16); + dim3 grid_combine((params.b * params.h * params.seqlen_q + kBlockM - 1) / + kBlockM); + BOOL_SWITCH(is_even_K, IsEvenKConst, [&] { + if (params.num_splits <= 2) { + flash_fwd_splitkv_combine_kernel + <<>>(params); + } else if (params.num_splits <= 4) { + flash_fwd_splitkv_combine_kernel + <<>>(params); + } else if (params.num_splits <= 8) { + flash_fwd_splitkv_combine_kernel + <<>>(params); + } else if (params.num_splits <= 16) { + flash_fwd_splitkv_combine_kernel + <<>>(params); + } else if (params.num_splits <= 32) { + flash_fwd_splitkv_combine_kernel + <<>>(params); + } else if (params.num_splits <= 64) { + flash_fwd_splitkv_combine_kernel + <<>>(params); + } else if (params.num_splits <= 128) { + flash_fwd_splitkv_combine_kernel + <<>>(params); + } + // C10_CUDA_KERNEL_LAUNCH_CHECK(); + }); + } } -template -void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int kBlockM = 64; // Fixed for all head dimensions - // TD [2023-08-28]: nvcc segfaults for headdim 96 with block size 64 x 256, - // and for headdim 192 with block size 64 x 128. - // Also for headdim 160 with block size 64 x 128 after the rotary addition. - constexpr static int kBlockN = Headdim <= 64 ? 256 : (Headdim <= 128 ? 128 : 64); - run_flash_splitkv_fwd>(params, stream); +template +void run_mha_fwd_splitkv_dispatch(Flash_fwd_params ¶ms, + cudaStream_t stream) { + constexpr static int kBlockM = 64; // Fixed for all head dimensions + // TD [2023-08-28]: nvcc segfaults for headdim 96 with block size 64 x 256, + // and for headdim 192 with block size 64 x 128. + // Also for headdim 160 with block size 64 x 128 after the rotary addition. + constexpr static int kBlockN = + Headdim <= 64 ? 256 : (Headdim <= 128 ? 128 : 64); + run_flash_splitkv_fwd< + Flash_fwd_kernel_traits>( + params, stream); } -template +template void run_mha_fwd_hdim32(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 32; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - }); + constexpr static int Headdim = 32; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); }); + }); } -template +template void run_mha_fwd_hdim64(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 64; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if constexpr(!Is_dropout) { - // Using 8 warps is 18% slower for seqlen=2k, 2 warps is 5% slower - // Using block size (64 x 256) is 27% slower for seqlen=2k - // Using block size (256 x 64) is 85% slower for seqlen=2k, because of register spilling - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - }); + constexpr static int Headdim = 64; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if constexpr (!Is_dropout) { + // Using 8 warps is 18% slower for seqlen=2k, 2 warps is 5% slower + // Using block size (64 x 256) is 27% slower for seqlen=2k + // Using block size (256 x 64) is 85% slower for seqlen=2k, because of + // register spilling + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } }); + }); } -template +template void run_mha_fwd_hdim96(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 96; - // auto dprops = at::cuda::getCurrentDeviceProperties(); - // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; - bool is_sm8x = true; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square), - if (is_sm8x) { - if constexpr(!Is_causal) { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // These two are always slower - // run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - }); + constexpr static int Headdim = 96; + // auto dprops = at::cuda::getCurrentDeviceProperties(); + // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; + bool is_sm8x = true; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's + // square), + if (is_sm8x) { + if constexpr (!Is_causal) { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); These two are always slower + // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); }); + }); } -template +template void run_mha_fwd_hdim128(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 128; - // auto dprops = at::cuda::getCurrentDeviceProperties(); - // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; - bool is_sm8x = true; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if constexpr(!Is_dropout) { - // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square), - // and 128 x 32 (48 KB smem) is the fastest for non-causal since we get 2 CTAs per SM. - if (is_sm8x) { - if constexpr(!Is_causal) { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // Using 8 warps (128 x 128 and 256 x 64) is 28% slower for seqlen=2k - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // 1st ones are good for H100, A100 - // 2nd one is good for A6000 bc we get slightly better occupancy - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - }); + constexpr static int Headdim = 128; + // auto dprops = at::cuda::getCurrentDeviceProperties(); + // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; + bool is_sm8x = true; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if constexpr (!Is_dropout) { + // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's + // square), and 128 x 32 (48 KB smem) is the fastest for non-causal + // since we get 2 CTAs per SM. + if (is_sm8x) { + if constexpr (!Is_causal) { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); Using 8 warps (128 + // x 128 and 256 x 64) is 28% slower for seqlen=2k + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); 1st ones are good + // for H100, A100 2nd one is good for A6000 bc we get slightly better + // occupancy + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + } }); + }); } -template +template void run_mha_fwd_hdim160(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 160; - // auto dprops = at::cuda::getCurrentDeviceProperties(); - // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; - bool is_sm8x = true; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - // For A100, H100, 128 x 32 is the fastest. - // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square), - // and 128 x 64 with 8 warps is the fastest for non-causal. - if (is_sm8x) { - if constexpr(!Is_causal) { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - }); + constexpr static int Headdim = 160; + // auto dprops = at::cuda::getCurrentDeviceProperties(); + // bool is_sm8x = dprops->major == 8 && dprops->minor > 0; + bool is_sm8x = true; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + // For A100, H100, 128 x 32 is the fastest. + // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's + // square), and 128 x 64 with 8 warps is the fastest for non-causal. + if (is_sm8x) { + if constexpr (!Is_causal) { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); }); + }); } -template +template void run_mha_fwd_hdim192(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 192; - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if constexpr(!Is_dropout) { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - // run_flash_fwd>(params, stream); - }); + constexpr static int Headdim = 192; + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if constexpr (!Is_dropout) { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd>(params, stream); run_flash_fwd>(params, stream); + // run_flash_fwd>(params, stream); }); + }); } -template +template void run_mha_fwd_hdim224(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 224; - int device; - cudaGetDevice(&device); - int max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - // printf("max_smem_per_block = %d\n", max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64)) { // 112 KB - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // We can't do 128 x 32 with 8 warps because with headdim 224, kBlockKSmem = 32. - // If we have N = 32, there are only 1024 elements to load at once, where each load - // is 8 elements. This means we can only use 128 threads and not 256 threads. - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - }); + constexpr static int Headdim = 224; + int device; + cudaGetDevice(&device); + int max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_block = %d\n", max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64)) { // 112 KB + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); We can't do 128 x 32 with 8 + // warps because with headdim 224, kBlockKSmem = 32. If we have N = 32, + // there are only 1024 elements to load at once, where each load is 8 + // elements. This means we can only use 128 threads and not 256 threads. + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); }); + }); } -template +template void run_mha_fwd_hdim256(Flash_fwd_params ¶ms, cudaStream_t stream) { - constexpr static int Headdim = 256; - int device; - cudaGetDevice(&device); - int max_smem_per_sm, max_smem_per_block; - cudaError status_ = cudaDeviceGetAttribute( - &max_smem_per_sm, cudaDevAttrMaxSharedMemoryPerMultiprocessor, device); - status_ = cudaDeviceGetAttribute( - &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - // if (status_ != cudaSuccess) { - // C10_CUDA_CHECK(status_); - // } - // printf("max_smem_per_sm = %d, max_smem_per_block = %d\n", max_smem_per_sm, max_smem_per_block); - BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { - BOOL_SWITCH(params.is_causal, Is_causal, [&] { - // For A100, we want to run with 128 x 64 (128KB smem). - // For H100 we want to run with 64 x 64 (96KB smem) since then we can get 2 CTAs per SM. - if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64) && max_smem_per_sm < 4 * Headdim * (64 + 2 * 64)) { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } else { - run_flash_fwd, Is_dropout, Is_causal>(params, stream); - } - // 64 KB - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - // 96 KB - // run_flash_fwd, Is_dropout, Is_causal>(params, stream); - }); + constexpr static int Headdim = 256; + int device; + cudaGetDevice(&device); + int max_smem_per_sm, max_smem_per_block; + cudaError status_ = cudaDeviceGetAttribute( + &max_smem_per_sm, cudaDevAttrMaxSharedMemoryPerMultiprocessor, device); + status_ = cudaDeviceGetAttribute( + &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + // if (status_ != cudaSuccess) { + // C10_CUDA_CHECK(status_); + // } + // printf("max_smem_per_sm = %d, max_smem_per_block = %d\n", max_smem_per_sm, + // max_smem_per_block); + BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { + BOOL_SWITCH(params.is_causal, Is_causal, [&] { + // For A100, we want to run with 128 x 64 (128KB smem). + // For H100 we want to run with 64 x 64 (96KB smem) since then we can get + // 2 CTAs per SM. + if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64) && + max_smem_per_sm < 4 * Headdim * (64 + 2 * 64)) { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } else { + run_flash_fwd< + Flash_fwd_kernel_traits, + Is_dropout, Is_causal>(params, stream); + } + // 64 KB + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); 96 KB + // run_flash_fwd, Is_dropout, Is_causal>(params, stream); }); + }); } diff --git a/external_libs/runtime/flash_attn/lib/kernel_traits.h b/external_libs/runtime/flash_attn/lib/kernel_traits.h index f000ff24d..cd1fae003 100644 --- a/external_libs/runtime/flash_attn/lib/kernel_traits.h +++ b/external_libs/runtime/flash_attn/lib/kernel_traits.h @@ -12,386 +12,393 @@ using namespace cute; -template +template struct Flash_kernel_traits { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using Element = elem_type; - static constexpr bool Has_cp_async = true; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using Element = elem_type; + static constexpr bool Has_cp_async = true; #else - using Element = cutlass::half_t; - static constexpr bool Has_cp_async = false; + using Element = cutlass::half_t; + static constexpr bool Has_cp_async = false; #endif - using ElementAccum = float; - using index_t = uint32_t; + using ElementAccum = float; + using index_t = uint32_t; -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using MMA_Atom_Arch = std::conditional_t< - std::is_same_v, - MMA_Atom, - MMA_Atom - >; - using ValLayoutMNK = Layout>; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using MMA_Atom_Arch = + std::conditional_t, + MMA_Atom, + MMA_Atom>; + using ValLayoutMNK = Layout>; #else - using MMA_Atom_Arch = MMA_Atom; - using ValLayoutMNK = Layout>; + using MMA_Atom_Arch = MMA_Atom; + using ValLayoutMNK = Layout>; #endif -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; #else - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; #endif }; // If Share_Q_K_smem is true, that forces Is_Q_in_regs to be true -template > +template > struct Flash_fwd_kernel_traits : public Base { - using Element = typename Base::Element; - using ElementAccum = typename Base::ElementAccum; - using index_t = typename Base::index_t; - static constexpr bool Has_cp_async = Base::Has_cp_async; - using SmemCopyAtom = typename Base::SmemCopyAtom; - using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; - - static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; - static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; - - // The number of threads. - static constexpr int kNWarps = kNWarps_; - static constexpr int kNThreads = kNWarps * 32; - - static constexpr int kBlockM = kBlockM_; - static constexpr int kBlockN = kBlockN_; - static constexpr int kHeadDim = kHeadDim_; - static_assert(kHeadDim % 32 == 0); - static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; - static constexpr int kBlockKGmem = kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); - static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; - - using TiledMma = TiledMMA< - typename Base::MMA_Atom_Arch, - Layout,_1,_1>>, // 4x1x1 or 8x1x1 thread group - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM - - using SmemLayoutAtomQ = decltype( - composition(Swizzle{}, - // This has to be kBlockKSmem, using kHeadDim gives wrong results for d=128 - Layout>, - Stride, _1>>{})); - using SmemLayoutQ = decltype(tile_to_shape( - SmemLayoutAtomQ{}, - Shape, Int>{})); - - using SmemLayoutKV = decltype(tile_to_shape( - SmemLayoutAtomQ{}, - Shape, Int>{})); - - // This has to be kBlockN and not 8, otherwise we get wrong results for d=128 - using SmemLayoutAtomVtransposedNoSwizzle = Layout, Int>, - Stride<_1, Int>>; - using SmemLayoutAtomVtransposed = decltype( - composition(Swizzle{}, SmemLayoutAtomVtransposedNoSwizzle{})); - using SmemLayoutVtransposed = decltype(tile_to_shape( - SmemLayoutAtomVtransposed{}, - Shape, Int>{})); - // Maybe the VtransposeNoSwizzle just needs to have the right shape - // And the strides don't matter? - using SmemLayoutVtransposedNoSwizzle = decltype(tile_to_shape( - SmemLayoutAtomVtransposedNoSwizzle{}, - Shape, Int>{})); - // using SmemLayoutVtransposedNoSwizzle = decltype(SmemLayoutVtransposed{}.layout_fn()); - - using SmemLayoutAtomO = decltype( - composition(Swizzle{}, - Layout, Int>, - Stride, _1>>{})); - using SmemLayoutO = decltype(tile_to_shape( - SmemLayoutAtomO{}, - Shape, Int>{})); - using SmemCopyAtomO = Copy_Atom; - using SmemCopyAtomOaccum = Copy_Atom; - - static constexpr int kSmemQCount = size(SmemLayoutQ{}); - static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; - static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); - static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); - static constexpr int kSmemSize = Share_Q_K_smem ? std::max(kSmemQSize, kSmemKVSize) : kSmemQSize + kSmemKVSize; - - static constexpr int kGmemElemsPerLoad = sizeof(cute::uint128_t) / sizeof(Element); - static_assert(kHeadDim % kGmemElemsPerLoad == 0, "kHeadDim must be a multiple of kGmemElemsPerLoad"); - // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because of bank conflicts. - // For example, for d=128, smem is split into 2 "pages", each page takes care of columns - // 0-63 and 64-127. If we have 16 threads per row for gmem read, when we write to smem, - // thread 0 - 7 will write to the first page and thread 8 - 15 will write to the second page, - // to the same banks. - static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRow == 0, "kNThreads must be a multiple of kGmemThreadsPerRow"); - using GmemLayoutAtom = Layout, Int>, - Stride, _1>>; - - // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we won't be reading - // from the same address by the same threadblock. This is slightly faster. - using Gmem_copy_struct = std::conditional_t< - Has_cp_async, - SM80_CP_ASYNC_CACHEGLOBAL, - DefaultCopy - >; - using GmemTiledCopyQKV = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopyO = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP"); - using GmemLayoutAtomP = Layout, Int>, - Stride, _1>>; - - using GmemTiledCopyP = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtomP{}, - Layout>{})); // Val layout, 8 vals per store - - using GmemLayoutAtomOaccum = std::conditional_t< - kBlockKSmem == 32, - Layout, // Thread layout, 8 threads per row - Stride< _8, _1>>, - Layout, // Thread layout, 16 threads per row - Stride< _16, _1>> - >; - using GmemTiledCopyOaccum = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtomOaccum{}, - Layout>{})); // Val layout, 4 vals per store - using GmemLayoutAtomRotcossin = GmemLayoutAtom; - using GmemTiledCopyRotcossin = decltype( - make_tiled_copy(Copy_Atom, Element>{}, - GmemLayoutAtomRotcossin{}, - Layout>{})); // Val layout, 4 vals per load - using GmemTiledCopyRotcossinCont = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtomRotcossin{}, - Layout>{})); // Val layout, 8 vals per load + using Element = typename Base::Element; + using ElementAccum = typename Base::ElementAccum; + using index_t = typename Base::index_t; + static constexpr bool Has_cp_async = Base::Has_cp_async; + using SmemCopyAtom = typename Base::SmemCopyAtom; + using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; + + static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; + static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; + + // The number of threads. + static constexpr int kNWarps = kNWarps_; + static constexpr int kNThreads = kNWarps * 32; + + static constexpr int kBlockM = kBlockM_; + static constexpr int kBlockN = kBlockN_; + static constexpr int kHeadDim = kHeadDim_; + static_assert(kHeadDim % 32 == 0); + static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; + static constexpr int kBlockKGmem = + kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); + static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; + + using TiledMma = + TiledMMA, _1, _1>>, // 4x1x1 or 8x1x1 thread + // group + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for + // 16x16x16 MMA and LDSM + + using SmemLayoutAtomQ = decltype(composition( + Swizzle{}, + // This has to be kBlockKSmem, using kHeadDim gives wrong results for + // d=128 + Layout>, Stride, _1>>{})); + using SmemLayoutQ = decltype( + tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + + using SmemLayoutKV = decltype( + tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + + // This has to be kBlockN and not 8, otherwise we get wrong results for d=128 + using SmemLayoutAtomVtransposedNoSwizzle = + Layout, Int>, + Stride<_1, Int>>; + using SmemLayoutAtomVtransposed = decltype(composition( + Swizzle{}, SmemLayoutAtomVtransposedNoSwizzle{})); + using SmemLayoutVtransposed = decltype(tile_to_shape( + SmemLayoutAtomVtransposed{}, Shape, Int>{})); + // Maybe the VtransposeNoSwizzle just needs to have the right shape + // And the strides don't matter? + using SmemLayoutVtransposedNoSwizzle = + decltype(tile_to_shape(SmemLayoutAtomVtransposedNoSwizzle{}, + Shape, Int>{})); + // using SmemLayoutVtransposedNoSwizzle = + // decltype(SmemLayoutVtransposed{}.layout_fn()); + + using SmemLayoutAtomO = decltype(composition( + Swizzle{}, + Layout, Int>, Stride, _1>>{})); + using SmemLayoutO = decltype( + tile_to_shape(SmemLayoutAtomO{}, Shape, Int>{})); + using SmemCopyAtomO = Copy_Atom; + using SmemCopyAtomOaccum = Copy_Atom; + + static constexpr int kSmemQCount = size(SmemLayoutQ{}); + static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; + static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); + static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); + static constexpr int kSmemSize = Share_Q_K_smem + ? std::max(kSmemQSize, kSmemKVSize) + : kSmemQSize + kSmemKVSize; + + static constexpr int kGmemElemsPerLoad = + sizeof(cute::uint128_t) / sizeof(Element); + static_assert(kHeadDim % kGmemElemsPerLoad == 0, + "kHeadDim must be a multiple of kGmemElemsPerLoad"); + // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because + // of bank conflicts. For example, for d=128, smem is split into 2 "pages", + // each page takes care of columns 0-63 and 64-127. If we have 16 threads per + // row for gmem read, when we write to smem, thread 0 - 7 will write to the + // first page and thread 8 - 15 will write to the second page, to the same + // banks. + static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRow == 0, + "kNThreads must be a multiple of kGmemThreadsPerRow"); + using GmemLayoutAtom = Layout< + Shape, Int>, + Stride, _1>>; + + // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we + // won't be reading from the same address by the same threadblock. This is + // slightly faster. + using Gmem_copy_struct = std::conditional_t< + Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL, DefaultCopy>; + using GmemTiledCopyQKV = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per read + using GmemTiledCopyO = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRowP == 0, + "kNThreads must be a multiple of kGmemThreadsPerRowP"); + using GmemLayoutAtomP = Layout< + Shape, Int>, + Stride, _1>>; + + using GmemTiledCopyP = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtomP{}, + Layout>{})); // Val layout, 8 vals per store + + using GmemLayoutAtomOaccum = std::conditional_t< + kBlockKSmem == 32, + Layout, // Thread layout, 8 threads per row + Stride<_8, _1>>, + Layout, // Thread layout, 16 threads per row + Stride<_16, _1>>>; + using GmemTiledCopyOaccum = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtomOaccum{}, + Layout>{})); // Val layout, 4 vals per store + using GmemLayoutAtomRotcossin = GmemLayoutAtom; + using GmemTiledCopyRotcossin = decltype(make_tiled_copy( + Copy_Atom, Element>{}, GmemLayoutAtomRotcossin{}, + Layout>{})); // Val layout, 4 vals per load + using GmemTiledCopyRotcossinCont = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtomRotcossin{}, + Layout>{})); // Val layout, 8 vals per load }; -// Is_V_in_regs is an option to reduce smem usage, but will increase register pressue. -// No_double_buffer is another option to reduce smem usage, but will slow things down. -template > +// Is_V_in_regs is an option to reduce smem usage, but will increase register +// pressue. No_double_buffer is another option to reduce smem usage, but will +// slow things down. +template > struct Flash_bwd_kernel_traits : public Base { - using Element = typename Base::Element; - using ElementAccum = typename Base::ElementAccum; - using index_t = typename Base::index_t; - static constexpr bool Has_cp_async = Base::Has_cp_async; - using SmemCopyAtom = typename Base::SmemCopyAtom; - using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; - - static constexpr bool Is_V_in_regs = Is_V_in_regs_; - static constexpr bool No_double_buffer = No_double_buffer_; - - // The number of threads. - static constexpr int kNWarps = kNWarps_; - static constexpr int kNThreads = kNWarps * 32; - - static constexpr int kBlockM = kBlockM_; - static constexpr int kBlockN = kBlockN_; - static constexpr int kHeadDim = kHeadDim_; - static_assert(kHeadDim % 32 == 0); - static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; - static constexpr int kBlockKGmem = kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); - static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; - - static constexpr int AtomLayoutMSdP = AtomLayoutMSdP_; - static_assert(kNWarps % AtomLayoutMSdP == 0); - static_assert(kNWarps % AtomLayoutNdKV == 0); - static_assert(kNWarps % AtomLayoutMdQ == 0); - - using TiledMmaSdP = TiledMMA< - typename Base::MMA_Atom_Arch, - Layout, Int, _1>>, - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM - - using TiledMmadKV = TiledMMA< - typename Base::MMA_Atom_Arch, - Layout, Int, _1>>, - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM - - using TiledMmadQ = TiledMMA< - typename Base::MMA_Atom_Arch, - Layout, Int, _1>>, // 2x4x1 or 4x2x1 thread group - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM - - using SmemLayoutAtomQdO = decltype( - composition(Swizzle{}, - Layout>, - Stride, _1>>{})); - using SmemLayoutQdO = decltype(tile_to_shape( - SmemLayoutAtomQdO{}, - make_shape(Int{}, Int{}))); - - using SmemLayoutAtomKV = decltype( - composition(Swizzle{}, - Layout, Int>, - Stride, _1>>{})); - using SmemLayoutKV = decltype(tile_to_shape( - // SmemLayoutAtomQdO{}, - SmemLayoutAtomKV{}, - make_shape(Int{}, Int{}))); - - using SmemLayoutAtomKtransposedNoSwizzle = Layout, Int>, - Stride<_1, Int>>; - using SmemLayoutAtomKtransposed = decltype( - composition(Swizzle{}, SmemLayoutAtomKtransposedNoSwizzle{})); - using SmemLayoutKtransposed = decltype(tile_to_shape( - SmemLayoutAtomKtransposed{}, - make_shape(Int{}, Int{}))); - // Maybe the KtransposeNoSwizzle just needs to have the right shape - // And the strides don't matter? - using SmemLayoutKtransposedNoSwizzle = decltype(tile_to_shape( - SmemLayoutAtomKtransposedNoSwizzle{}, - make_shape(Int{}, Int{}))); - // using SmemLayoutKtransposedNoSwizzle = decltype(SmemLayoutKtransposed{}.layout_fn()); - - // TODO: generalize to other values of kBlockN - // TODO: what should be the Swizzle here? 3 is faster than 1, and 1 is faster than 2 - // static constexpr int kPBlockN = kBlockN; - static_assert(kBlockN >= 64); - // TD [2023-03-19]: Idk why kPBlockN = 16 and kSwizzlePdS=3 is the fastest. - static constexpr int kPBlockN = 64; - static_assert(kPBlockN == 16 || kPBlockN == 32 || kPBlockN == 64); - // static constexpr int kSwizzlePdS = kPBlockN == 16 ? 1 : (kPBlockN == 32 ? 2 : 3); - static constexpr int kSwizzlePdS = 3; - using SmemLayoutAtomPdS = decltype( - composition(Swizzle{}, - Layout, Int>, - Stride, _1>>{})); - using SmemLayoutPdS = decltype(tile_to_shape( - SmemLayoutAtomPdS{}, - make_shape(Int{}, Int{}))); - using SmemLayoutAtomPdStransposedNoSwizzle = Layout, Int>, - Stride<_1, Int>>; - using SmemLayoutAtomPdStransposed = decltype( - composition(Swizzle{}, SmemLayoutAtomPdStransposedNoSwizzle{})); - using SmemLayoutPdStransposed = decltype(tile_to_shape( - SmemLayoutAtomPdStransposed{}, - make_shape(Int{}, Int{}))); - using SmemLayoutPdStransposedNoSwizzle = decltype(tile_to_shape( - SmemLayoutAtomPdStransposedNoSwizzle{}, - make_shape(Int{}, Int{}))); - // using SmemLayoutPdStransposedNoSwizzle = decltype(SmemLayoutPdStransposed{}.layout_fn()); - using SmemCopyAtomPdS = Copy_Atom; - - using SmemLayoutAtomQdOtransposedNoSwizzle = Layout, Int>, - Stride<_1, Int>>; - using SmemLayoutAtomQdOtransposed = decltype( - composition(Swizzle{}, SmemLayoutAtomQdOtransposedNoSwizzle{})); - using SmemLayoutQdOtransposed = decltype(tile_to_shape( - SmemLayoutAtomQdOtransposed{}, - make_shape(Int{}, Int{}))); - using SmemLayoutQdOtransposedNoSwizzle = decltype(tile_to_shape( - SmemLayoutAtomQdOtransposedNoSwizzle{}, - make_shape(Int{}, Int{}))); - // using SmemLayoutQdOtransposedNoSwizzle = decltype(SmemLayoutQdOtransposed{}.layout_fn()); - - using SmemLayoutAtomdKV = decltype( - composition(Swizzle{}, - Layout>, - Stride, _1>>{})); - using SmemLayoutdKV = decltype(tile_to_shape( - SmemLayoutAtomdKV{}, - make_shape(Int{}, Int{}))); - using SmemCopyAtomdKV = Copy_Atom; - - using SmemLayoutAtomdQ = decltype( - composition(Swizzle{}, - Layout>, - Stride, _1>>{})); - using SmemLayoutdQ = decltype(tile_to_shape( - SmemLayoutAtomdQ{}, - make_shape(Int{}, Int{}))); - using SmemCopyAtomdQ = Copy_Atom; - - static constexpr int kSmemQdOCount = size(SmemLayoutQdO{}) * (No_double_buffer ? 2 : 3); // Double buffer for sQ - static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; - static constexpr int kSmemdSCount = size(SmemLayoutPdS{}); - static constexpr int kSmemPCount = size(SmemLayoutPdS{}); - static constexpr int kSmemdQCount = size(SmemLayoutdQ{}); - static constexpr int kSmemQdOSize = kSmemQdOCount * sizeof(Element); - static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); - static constexpr int kSmemdSSize = kSmemdSCount * sizeof(Element); - static constexpr int kSmemPSize = kSmemPCount * sizeof(Element); - static constexpr int kSmemdQSize = kSmemdQCount * sizeof(Element); - static constexpr int kSmemSize = kSmemQdOSize - + (!Is_V_in_regs + using Element = typename Base::Element; + using ElementAccum = typename Base::ElementAccum; + using index_t = typename Base::index_t; + static constexpr bool Has_cp_async = Base::Has_cp_async; + using SmemCopyAtom = typename Base::SmemCopyAtom; + using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; + + static constexpr bool Is_V_in_regs = Is_V_in_regs_; + static constexpr bool No_double_buffer = No_double_buffer_; + + // The number of threads. + static constexpr int kNWarps = kNWarps_; + static constexpr int kNThreads = kNWarps * 32; + + static constexpr int kBlockM = kBlockM_; + static constexpr int kBlockN = kBlockN_; + static constexpr int kHeadDim = kHeadDim_; + static_assert(kHeadDim % 32 == 0); + static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; + static constexpr int kBlockKGmem = + kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); + static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; + + static constexpr int AtomLayoutMSdP = AtomLayoutMSdP_; + static_assert(kNWarps % AtomLayoutMSdP == 0); + static_assert(kNWarps % AtomLayoutNdKV == 0); + static_assert(kNWarps % AtomLayoutMdQ == 0); + + using TiledMmaSdP = TiledMMA< + typename Base::MMA_Atom_Arch, + Layout, Int, _1>>, + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 + // MMA and LDSM + + using TiledMmadKV = TiledMMA< + typename Base::MMA_Atom_Arch, + Layout, Int, _1>>, + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 + // MMA and LDSM + + using TiledMmadQ = + TiledMMA, Int, + _1>>, // 2x4x1 or 4x2x1 thread group + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for + // 16x16x16 MMA and LDSM + + using SmemLayoutAtomQdO = decltype(composition( + Swizzle{}, + Layout>, Stride, _1>>{})); + using SmemLayoutQdO = decltype(tile_to_shape( + SmemLayoutAtomQdO{}, make_shape(Int{}, Int{}))); + + using SmemLayoutAtomKV = decltype( + composition(Swizzle{}, + Layout, Int>, + Stride, _1>>{})); + using SmemLayoutKV = decltype(tile_to_shape( + // SmemLayoutAtomQdO{}, + SmemLayoutAtomKV{}, make_shape(Int{}, Int{}))); + + using SmemLayoutAtomKtransposedNoSwizzle = + Layout, Int>, + Stride<_1, Int>>; + using SmemLayoutAtomKtransposed = decltype(composition( + Swizzle{}, SmemLayoutAtomKtransposedNoSwizzle{})); + using SmemLayoutKtransposed = + decltype(tile_to_shape(SmemLayoutAtomKtransposed{}, + make_shape(Int{}, Int{}))); + // Maybe the KtransposeNoSwizzle just needs to have the right shape + // And the strides don't matter? + using SmemLayoutKtransposedNoSwizzle = + decltype(tile_to_shape(SmemLayoutAtomKtransposedNoSwizzle{}, + make_shape(Int{}, Int{}))); + // using SmemLayoutKtransposedNoSwizzle = + // decltype(SmemLayoutKtransposed{}.layout_fn()); + + // TODO: generalize to other values of kBlockN + // TODO: what should be the Swizzle here? 3 is faster than 1, and 1 is faster + // than 2 static constexpr int kPBlockN = kBlockN; + static_assert(kBlockN >= 64); + // TD [2023-03-19]: Idk why kPBlockN = 16 and kSwizzlePdS=3 is the fastest. + static constexpr int kPBlockN = 64; + static_assert(kPBlockN == 16 || kPBlockN == 32 || kPBlockN == 64); + // static constexpr int kSwizzlePdS = kPBlockN == 16 ? 1 : (kPBlockN == 32 ? 2 + // : 3); + static constexpr int kSwizzlePdS = 3; + using SmemLayoutAtomPdS = decltype(composition( + Swizzle{}, + Layout, Int>, Stride, _1>>{})); + using SmemLayoutPdS = decltype(tile_to_shape( + SmemLayoutAtomPdS{}, make_shape(Int{}, Int{}))); + using SmemLayoutAtomPdStransposedNoSwizzle = + Layout, Int>, Stride<_1, Int>>; + using SmemLayoutAtomPdStransposed = decltype(composition( + Swizzle{}, SmemLayoutAtomPdStransposedNoSwizzle{})); + using SmemLayoutPdStransposed = + decltype(tile_to_shape(SmemLayoutAtomPdStransposed{}, + make_shape(Int{}, Int{}))); + using SmemLayoutPdStransposedNoSwizzle = + decltype(tile_to_shape(SmemLayoutAtomPdStransposedNoSwizzle{}, + make_shape(Int{}, Int{}))); + // using SmemLayoutPdStransposedNoSwizzle = + // decltype(SmemLayoutPdStransposed{}.layout_fn()); + using SmemCopyAtomPdS = Copy_Atom; + + using SmemLayoutAtomQdOtransposedNoSwizzle = + Layout, Int>, + Stride<_1, Int>>; + using SmemLayoutAtomQdOtransposed = decltype(composition( + Swizzle{}, SmemLayoutAtomQdOtransposedNoSwizzle{})); + using SmemLayoutQdOtransposed = + decltype(tile_to_shape(SmemLayoutAtomQdOtransposed{}, + make_shape(Int{}, Int{}))); + using SmemLayoutQdOtransposedNoSwizzle = + decltype(tile_to_shape(SmemLayoutAtomQdOtransposedNoSwizzle{}, + make_shape(Int{}, Int{}))); + // using SmemLayoutQdOtransposedNoSwizzle = + // decltype(SmemLayoutQdOtransposed{}.layout_fn()); + + using SmemLayoutAtomdKV = decltype(composition( + Swizzle{}, + Layout>, Stride, _1>>{})); + using SmemLayoutdKV = decltype(tile_to_shape( + SmemLayoutAtomdKV{}, make_shape(Int{}, Int{}))); + using SmemCopyAtomdKV = Copy_Atom; + + using SmemLayoutAtomdQ = decltype(composition( + Swizzle{}, + Layout>, Stride, _1>>{})); + using SmemLayoutdQ = decltype(tile_to_shape( + SmemLayoutAtomdQ{}, make_shape(Int{}, Int{}))); + using SmemCopyAtomdQ = Copy_Atom; + + static constexpr int kSmemQdOCount = + size(SmemLayoutQdO{}) * + (No_double_buffer ? 2 : 3); // Double buffer for sQ + static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; + static constexpr int kSmemdSCount = size(SmemLayoutPdS{}); + static constexpr int kSmemPCount = size(SmemLayoutPdS{}); + static constexpr int kSmemdQCount = size(SmemLayoutdQ{}); + static constexpr int kSmemQdOSize = kSmemQdOCount * sizeof(Element); + static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); + static constexpr int kSmemdSSize = kSmemdSCount * sizeof(Element); + static constexpr int kSmemPSize = kSmemPCount * sizeof(Element); + static constexpr int kSmemdQSize = kSmemdQCount * sizeof(Element); + static constexpr int kSmemSize = + kSmemQdOSize + + (!Is_V_in_regs ? kSmemKVSize + kSmemdSSize + std::max(kSmemPSize, kSmemdQSize) - : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + std::max(kSmemPSize, kSmemdQSize))); - static constexpr int kSmemSize1colblock = kSmemQdOSize - + (!Is_V_in_regs + : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + + std::max(kSmemPSize, kSmemdQSize))); + static constexpr int kSmemSize1colblock = + kSmemQdOSize + + (!Is_V_in_regs ? kSmemKVSize + kSmemdSSize + kSmemPSize : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + kSmemPSize)); - static constexpr int kSmemSize1rowblock = kSmemQdOSize / 3 * 2 + kSmemKVSize / 2 * 3 - + kSmemdSSize + kSmemPSize; - - - static constexpr int kGmemElemsPerLoad = sizeof(cute::uint128_t) / sizeof(Element); - static_assert(kHeadDim % kGmemElemsPerLoad == 0, "kHeadDim must be a multiple of kGmemElemsPerLoad"); - // Using kBlockKSmem instead of kHeadDim here to avoid bank conflicts, but doesn't seem - // to affect speed in practice. - static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRow == 0, "kNThreads must be a multiple of kGmemThreadsPerRow"); - using GmemLayoutAtom = Layout, Int>, - Stride, _1>>; - - // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we won't be reading - // from the same address by the same threadblock. This is slightly faster. - using Gmem_copy_struct = std::conditional_t< - Has_cp_async, - SM80_CP_ASYNC_CACHEGLOBAL, - DefaultCopy - >; - using GmemTiledCopyQKV = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopydO = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemTiledCopydKV = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemTiledCopydQ = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemLayoutAtomdQaccum = std::conditional_t< - kBlockKSmem == 32, - Layout, // Thread layout, 8 threads per row - Stride< _8, _1>>, - Layout, // Thread layout, 16 threads per row - Stride< _16, _1>> - >; - using GmemTiledCopydQaccum = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtomdQaccum{}, - Layout>{})); // Val layout, 4 vals per store - - using GmemTiledCopydQaccumAtomicAdd = decltype( - make_tiled_copy(Copy_Atom{}, - Layout, // Thread layout, 8 threads per row - Stride<_32, _1>>{}, - Layout>{})); // Val layout, 1 val per store - + static constexpr int kSmemSize1rowblock = + kSmemQdOSize / 3 * 2 + kSmemKVSize / 2 * 3 + kSmemdSSize + kSmemPSize; + + static constexpr int kGmemElemsPerLoad = + sizeof(cute::uint128_t) / sizeof(Element); + static_assert(kHeadDim % kGmemElemsPerLoad == 0, + "kHeadDim must be a multiple of kGmemElemsPerLoad"); + // Using kBlockKSmem instead of kHeadDim here to avoid bank conflicts, but + // doesn't seem to affect speed in practice. + static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRow == 0, + "kNThreads must be a multiple of kGmemThreadsPerRow"); + using GmemLayoutAtom = Layout< + Shape, Int>, + Stride, _1>>; + + // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we + // won't be reading from the same address by the same threadblock. This is + // slightly faster. + using Gmem_copy_struct = std::conditional_t< + Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL, DefaultCopy>; + using GmemTiledCopyQKV = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per read + using GmemTiledCopydO = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopydKV = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopydQ = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemLayoutAtomdQaccum = std::conditional_t< + kBlockKSmem == 32, + Layout, // Thread layout, 8 threads per row + Stride<_8, _1>>, + Layout, // Thread layout, 16 threads per row + Stride<_16, _1>>>; + using GmemTiledCopydQaccum = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtomdQaccum{}, + Layout>{})); // Val layout, 4 vals per store + + using GmemTiledCopydQaccumAtomicAdd = decltype( + make_tiled_copy(Copy_Atom{}, + Layout, // Thread layout, 8 threads per row + Stride<_32, _1>>{}, + Layout>{})); // Val layout, 1 val per store }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h b/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h index e07f38390..1c2fc9bab 100644 --- a/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h +++ b/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h @@ -12,148 +12,154 @@ using namespace cute; -template +template struct Flash_kernel_traits_sm90 { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using Element = elem_type; - static constexpr bool Has_cp_async = true; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using Element = elem_type; + static constexpr bool Has_cp_async = true; #else - using Element = cutlass::half_t; - static constexpr bool Has_cp_async = false; + using Element = cutlass::half_t; + static constexpr bool Has_cp_async = false; #endif - using ElementAccum = float; - using index_t = uint32_t; + using ElementAccum = float; + using index_t = uint32_t; -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - using MMA_Atom_Arch = std::conditional_t< - std::is_same_v, - MMA_Atom, - MMA_Atom - >; - using ValLayoutMNK = Layout>; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + using MMA_Atom_Arch = + std::conditional_t, + MMA_Atom, + MMA_Atom>; + using ValLayoutMNK = Layout>; #else - using MMA_Atom_Arch = MMA_Atom; - using ValLayoutMNK = Layout>; + using MMA_Atom_Arch = MMA_Atom; + using ValLayoutMNK = Layout>; #endif -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750 + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; #else - using SmemCopyAtom = Copy_Atom; - using SmemCopyAtomTransposed = Copy_Atom; + using SmemCopyAtom = Copy_Atom; + using SmemCopyAtomTransposed = Copy_Atom; #endif }; -template > +template > struct Flash_fwd_kernel_traits : public Base { - using Element = typename Base::Element; - using ElementAccum = typename Base::ElementAccum; - using index_t = typename Base::index_t; - static constexpr bool Has_cp_async = Base::Has_cp_async; - using SmemCopyAtom = typename Base::SmemCopyAtom; - using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; - - static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; - static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; - - // The number of threads. - static constexpr int kNWarps = kNWarps_; - static constexpr int kNThreads = kNWarps * 32; - - static constexpr int kBlockM = kBlockM_; - static constexpr int kBlockN = kBlockN_; - static constexpr int kHeadDim = kHeadDim_; - static_assert(kHeadDim % 32 == 0); - static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; - static constexpr int kBlockKGmem = kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); - static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; - - using TiledMma = TiledMMA< - typename Base::MMA_Atom_Arch, - Layout,_1,_1>>, // 4x1x1 or 8x1x1 thread group - typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM - - using SmemLayoutAtomQ = decltype( - composition(Swizzle{}, - // This has to be kBlockKSmem, using kHeadDim gives wrong results for d=128 - Layout>, - Stride, _1>>{})); - using SmemLayoutQ = decltype(tile_to_shape( - SmemLayoutAtomQ{}, - Shape, Int>{})); - - using SmemLayoutKV = decltype(tile_to_shape( - SmemLayoutAtomQ{}, - Shape, Int>{})); - - using SmemLayoutAtomVtransposed = decltype( - composition(Swizzle{}, - // This has to be kBlockN and not 8, otherwise we get wrong results for d=128 - Layout, Int>, - Stride<_1, Int>>{})); - using SmemLayoutVtransposed = decltype(tile_to_shape( - SmemLayoutAtomVtransposed{}, - Shape, Int>{})); - // Maybe the VtransposeNoSwizzle just needs to have the right shape - // And the strides don't matter? - using SmemLayoutVtransposedNoSwizzle = decltype(SmemLayoutVtransposed{}.layout_fn()); - - using SmemLayoutAtomO = decltype( - composition(Swizzle{}, - Layout, Int>, - Stride, _1>>{})); - using SmemLayoutO = decltype(tile_to_shape( - SmemLayoutAtomO{}, - Shape, Int>{})); - using SmemCopyAtomO = Copy_Atom; - - static constexpr int kSmemQCount = size(SmemLayoutQ{}); - static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; - static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); - static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); - static constexpr int kSmemSize = Share_Q_K_smem ? std::max(kSmemQSize, kSmemKVSize) : kSmemQSize + kSmemKVSize; - - static constexpr int kGmemElemsPerLoad = sizeof(cute::uint128_t) / sizeof(Element); - static_assert(kHeadDim % kGmemElemsPerLoad == 0, "kHeadDim must be a multiple of kGmemElemsPerLoad"); - // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because of bank conflicts. - // For example, for d=128, smem is split into 2 "pages", each page takes care of columns - // 0-63 and 64-127. If we have 16 threads per row for gmem read, when we write to smem, - // thread 0 - 7 will write to the first page and thread 8 - 15 will write to the second page, - // to the same banks. - static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRow == 0, "kNThreads must be a multiple of kGmemThreadsPerRow"); - using GmemLayoutAtom = Layout, Int>, - Stride, _1>>; - - // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we won't be reading - // from the same address by the same threadblock. This is slightly faster. - using Gmem_copy_struct = std::conditional_t< - Has_cp_async, - SM80_CP_ASYNC_CACHEGLOBAL, - DefaultCopy - >; - using GmemTiledCopyQKV = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopyO = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; - static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP"); - using GmemLayoutAtomP = Layout, Int>, - Stride, _1>>; - - using GmemTiledCopyP = decltype( - make_tiled_copy(Copy_Atom{}, - GmemLayoutAtomP{}, - Layout>{})); // Val layout, 8 vals per store - + using Element = typename Base::Element; + using ElementAccum = typename Base::ElementAccum; + using index_t = typename Base::index_t; + static constexpr bool Has_cp_async = Base::Has_cp_async; + using SmemCopyAtom = typename Base::SmemCopyAtom; + using SmemCopyAtomTransposed = typename Base::SmemCopyAtomTransposed; + + static constexpr bool Share_Q_K_smem = Share_Q_K_smem_; + static constexpr bool Is_Q_in_regs = Is_Q_in_regs_ || Share_Q_K_smem; + + // The number of threads. + static constexpr int kNWarps = kNWarps_; + static constexpr int kNThreads = kNWarps * 32; + + static constexpr int kBlockM = kBlockM_; + static constexpr int kBlockN = kBlockN_; + static constexpr int kHeadDim = kHeadDim_; + static_assert(kHeadDim % 32 == 0); + static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32; + static constexpr int kBlockKGmem = + kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32); + static constexpr int kSwizzle = kBlockKSmem == 32 ? 2 : 3; + + using TiledMma = + TiledMMA, _1, _1>>, // 4x1x1 or 8x1x1 thread + // group + typename Base::ValLayoutMNK>; // 1x2x1 or 1x2x2 value group for + // 16x16x16 MMA and LDSM + + using SmemLayoutAtomQ = decltype(composition( + Swizzle{}, + // This has to be kBlockKSmem, using kHeadDim gives wrong results for + // d=128 + Layout>, Stride, _1>>{})); + using SmemLayoutQ = decltype( + tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + + using SmemLayoutKV = decltype( + tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + + using SmemLayoutAtomVtransposed = + decltype(composition(Swizzle{}, + // This has to be kBlockN and not 8, otherwise we get + // wrong results for d=128 + Layout, Int>, + Stride<_1, Int>>{})); + using SmemLayoutVtransposed = decltype(tile_to_shape( + SmemLayoutAtomVtransposed{}, Shape, Int>{})); + // Maybe the VtransposeNoSwizzle just needs to have the right shape + // And the strides don't matter? + using SmemLayoutVtransposedNoSwizzle = + decltype(SmemLayoutVtransposed{}.layout_fn()); + + using SmemLayoutAtomO = decltype(composition( + Swizzle{}, + Layout, Int>, Stride, _1>>{})); + using SmemLayoutO = decltype( + tile_to_shape(SmemLayoutAtomO{}, Shape, Int>{})); + using SmemCopyAtomO = Copy_Atom; + + static constexpr int kSmemQCount = size(SmemLayoutQ{}); + static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2; + static constexpr int kSmemQSize = kSmemQCount * sizeof(Element); + static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element); + static constexpr int kSmemSize = Share_Q_K_smem + ? std::max(kSmemQSize, kSmemKVSize) + : kSmemQSize + kSmemKVSize; + + static constexpr int kGmemElemsPerLoad = + sizeof(cute::uint128_t) / sizeof(Element); + static_assert(kHeadDim % kGmemElemsPerLoad == 0, + "kHeadDim must be a multiple of kGmemElemsPerLoad"); + // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because + // of bank conflicts. For example, for d=128, smem is split into 2 "pages", + // each page takes care of columns 0-63 and 64-127. If we have 16 threads per + // row for gmem read, when we write to smem, thread 0 - 7 will write to the + // first page and thread 8 - 15 will write to the second page, to the same + // banks. + static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRow == 0, + "kNThreads must be a multiple of kGmemThreadsPerRow"); + using GmemLayoutAtom = Layout< + Shape, Int>, + Stride, _1>>; + + // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we + // won't be reading from the same address by the same threadblock. This is + // slightly faster. + using Gmem_copy_struct = std::conditional_t< + Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL, DefaultCopy>; + using GmemTiledCopyQKV = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per read + using GmemTiledCopyO = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; + static_assert(kNThreads % kGmemThreadsPerRowP == 0, + "kNThreads must be a multiple of kGmemThreadsPerRowP"); + using GmemLayoutAtomP = Layout< + Shape, Int>, + Stride, _1>>; + + using GmemTiledCopyP = decltype( + make_tiled_copy(Copy_Atom{}, GmemLayoutAtomP{}, + Layout>{})); // Val layout, 8 vals per store }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/external_libs/runtime/flash_attn/lib/softmax.h b/external_libs/runtime/flash_attn/lib/softmax.h index 09a93f145..5b8368609 100644 --- a/external_libs/runtime/flash_attn/lib/softmax.h +++ b/external_libs/runtime/flash_attn/lib/softmax.h @@ -19,265 +19,323 @@ using namespace cute; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -__device__ inline void thread_reduce_(Tensor const &tensor, Tensor &summary, Operator &op) { - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(summary) == size<0>(tensor)); - #pragma unroll - for (int mi = 0; mi < size<0>(tensor); mi++) { - summary(mi) = zero_init ? tensor(mi, 0) : op(summary(mi), tensor(mi, 0)); - #pragma unroll - for (int ni = 1; ni < size<1>(tensor); ni++) { - summary(mi) = op(summary(mi), tensor(mi, ni)); - } +template +__device__ inline void thread_reduce_(Tensor const &tensor, + Tensor &summary, + Operator &op) { + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(summary) == size<0>(tensor)); +#pragma unroll + for (int mi = 0; mi < size<0>(tensor); mi++) { + summary(mi) = zero_init ? tensor(mi, 0) : op(summary(mi), tensor(mi, 0)); +#pragma unroll + for (int ni = 1; ni < size<1>(tensor); ni++) { + summary(mi) = op(summary(mi), tensor(mi, ni)); } + } } -template -__device__ inline void quad_allreduce_(Tensor &dst, Tensor &src, Operator &op) { - CUTE_STATIC_ASSERT_V(size(dst) == size(src)); - #pragma unroll - for (int i = 0; i < size(dst); i++){ - dst(i) = Allreduce<4>::run(src(i), op); - } +template +__device__ inline void quad_allreduce_(Tensor &dst, + Tensor &src, + Operator &op) { + CUTE_STATIC_ASSERT_V(size(dst) == size(src)); +#pragma unroll + for (int i = 0; i < size(dst); i++) { + dst(i) = Allreduce<4>::run(src(i), op); + } } -template -__device__ inline void reduce_(Tensor const& tensor, Tensor &summary, Operator &op) { - thread_reduce_(tensor, summary, op); - quad_allreduce_(summary, summary, op); +template +__device__ inline void reduce_(Tensor const &tensor, + Tensor &summary, + Operator &op) { + thread_reduce_(tensor, summary, op); + quad_allreduce_(summary, summary, op); } -template -__device__ inline void reduce_max(Tensor const& tensor, Tensor &max){ - MaxOp max_op; - reduce_(tensor, max, max_op); +template +__device__ inline void reduce_max(Tensor const &tensor, + Tensor &max) { + MaxOp max_op; + reduce_(tensor, max, max_op); } -template -__device__ inline void reduce_sum(Tensor const& tensor, Tensor &sum){ - SumOp sum_op; - reduce_(tensor, sum, sum_op); +template +__device__ inline void reduce_sum(Tensor const &tensor, + Tensor &sum) { + SumOp sum_op; + reduce_(tensor, sum, sum_op); } // Apply the exp to all the elements. -template -inline __device__ void scale_apply_exp2(Tensor &tensor, Tensor const &max, const float scale) { - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); - #pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - // If max is -inf, then all elements must have been -inf (possibly due to masking). - // We don't want (-inf - (-inf)) since that would give NaN. - // If we don't have float around M_LOG2E the multiplication is done in fp64. - const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * (Scale_max ? scale : float(M_LOG2E)); - #pragma unroll - for (int ni = 0; ni < size<1>(tensor); ++ni) { - // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - - // max * log_2(e)) This allows the compiler to use the ffma - // instruction instead of fadd and fmul separately. - tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); - } +template +inline __device__ void scale_apply_exp2(Tensor &tensor, + Tensor const &max, + const float scale) { + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); +#pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + // If max is -inf, then all elements must have been -inf (possibly due to + // masking). We don't want (-inf - (-inf)) since that would give NaN. If we + // don't have float around M_LOG2E the multiplication is done in fp64. + const float max_scaled = + max(mi) == -INFINITY ? 0.f + : max(mi) * (Scale_max ? scale : float(M_LOG2E)); +#pragma unroll + for (int ni = 0; ni < size<1>(tensor); ++ni) { + // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - + // max * log_2(e)) This allows the compiler to use the ffma + // instruction instead of fadd and fmul separately. + tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); } + } } // Apply the exp to all the elements. -template -inline __device__ void max_scale_exp2_sum(Tensor &tensor, Tensor &max, Tensor &sum, const float scale) { - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 1, "Only support 1D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); - #pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - MaxOp max_op; - max(mi) = zero_init ? tensor(mi, 0) : max_op(max(mi), tensor(mi, 0)); - #pragma unroll - for (int ni = 1; ni < size<1>(tensor); ni++) { - max(mi) = max_op(max(mi), tensor(mi, ni)); - } - max(mi) = Allreduce<4>::run(max(mi), max_op); - // If max is -inf, then all elements must have been -inf (possibly due to masking). - // We don't want (-inf - (-inf)) since that would give NaN. - const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * scale; - sum(mi) = 0; - #pragma unroll - for (int ni = 0; ni < size<1>(tensor); ++ni) { - // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - - // max * log_2(e)) This allows the compiler to use the ffma - // instruction instead of fadd and fmul separately. - tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); - sum(mi) += tensor(mi, ni); - } - SumOp sum_op; - sum(mi) = Allreduce<4>::run(sum(mi), sum_op); +template +inline __device__ void max_scale_exp2_sum(Tensor &tensor, + Tensor &max, + Tensor &sum, + const float scale) { + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 1, "Only support 1D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor)); +#pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + MaxOp max_op; + max(mi) = zero_init ? tensor(mi, 0) : max_op(max(mi), tensor(mi, 0)); +#pragma unroll + for (int ni = 1; ni < size<1>(tensor); ni++) { + max(mi) = max_op(max(mi), tensor(mi, ni)); + } + max(mi) = Allreduce<4>::run(max(mi), max_op); + // If max is -inf, then all elements must have been -inf (possibly due to + // masking). We don't want (-inf - (-inf)) since that would give NaN. + const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * scale; + sum(mi) = 0; +#pragma unroll + for (int ni = 0; ni < size<1>(tensor); ++ni) { + // Instead of computing exp(x - max), we compute exp2(x * log_2(e) - + // max * log_2(e)) This allows the compiler to use the ffma + // instruction instead of fadd and fmul separately. + tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled); + sum(mi) += tensor(mi, ni); } + SumOp sum_op; + sum(mi) = Allreduce<4>::run(sum(mi), sum_op); + } } template -inline __device__ void apply_mask(Tensor &tensor, const int max_seqlen_k, +inline __device__ void apply_mask(Tensor &tensor, + const int max_seqlen_k, const int col_idx_offset_ = 0) { - // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) - static_assert(Layout::rank == 2, "Only support 2D Tensor"); - const int lane_id = threadIdx.x % 32; - const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; - #pragma unroll - for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { - const int col_idx_base = col_idx_offset + nj * 8; - #pragma unroll - for (int j = 0; j < size<1, 0>(tensor); ++j) { - const int col_idx = col_idx_base + j; - if (col_idx >= max_seqlen_k) { - // Without the "make_coord" we get wrong results - #pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - tensor(mi, make_coord(j, nj)) = -INFINITY; - } - } + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout::rank == 2, "Only support 2D Tensor"); + const int lane_id = threadIdx.x % 32; + const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; +#pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; +#pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; + if (col_idx >= max_seqlen_k) { +// Without the "make_coord" we get wrong results +#pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + tensor(mi, make_coord(j, nj)) = -INFINITY; } + } } + } } -template -inline __device__ void apply_mask_local(Tensor &tensor, const int col_idx_offset_, - const int max_seqlen_k, const int row_idx_offset, - const int max_seqlen_q, const int warp_row_stride, - const int window_size_left, const int window_size_right) { - // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) - static_assert(Layout::rank == 2, "Only support 2D Tensor"); - const int lane_id = threadIdx.x % 32; - const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; - #pragma unroll - for (int mi = 0; mi < size<0, 1>(tensor); ++mi) { - const int row_idx_base = row_idx_offset + mi * warp_row_stride; - #pragma unroll - for (int i = 0; i < size<0, 0>(tensor); ++i) { - const int row_idx = row_idx_base + i * 8; - const int col_idx_limit_left = std::max(0, row_idx + max_seqlen_k - max_seqlen_q - window_size_left); - const int col_idx_limit_right = std::min(max_seqlen_k, row_idx + 1 + max_seqlen_k - max_seqlen_q + window_size_right); - #pragma unroll - for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { - const int col_idx_base = col_idx_offset + nj * 8; - #pragma unroll - for (int j = 0; j < size<1, 0>(tensor); ++j) { - const int col_idx = col_idx_base + j; - if (col_idx >= col_idx_limit_right || (HasWSLeft && col_idx < col_idx_limit_left)) { - tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY; - } - } - } - // if (cute::thread0()) { - // printf("mi = %d, i = %d, row_idx = %d, max_seqlen_k = %d\n", mi, i, row_idx, max_seqlen_k); - // print(tensor(make_coord(i, mi), _)); - // // print(tensor(_, j + nj * size<1, 0>(tensor))); - // } +template +inline __device__ void +apply_mask_local(Tensor &tensor, const int col_idx_offset_, + const int max_seqlen_k, const int row_idx_offset, + const int max_seqlen_q, const int warp_row_stride, + const int window_size_left, const int window_size_right) { + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout::rank == 2, "Only support 2D Tensor"); + const int lane_id = threadIdx.x % 32; + const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2; +#pragma unroll + for (int mi = 0; mi < size<0, 1>(tensor); ++mi) { + const int row_idx_base = row_idx_offset + mi * warp_row_stride; +#pragma unroll + for (int i = 0; i < size<0, 0>(tensor); ++i) { + const int row_idx = row_idx_base + i * 8; + const int col_idx_limit_left = + std::max(0, row_idx + max_seqlen_k - max_seqlen_q - window_size_left); + const int col_idx_limit_right = + std::min(max_seqlen_k, row_idx + 1 + max_seqlen_k - max_seqlen_q + + window_size_right); +#pragma unroll + for (int nj = 0; nj < size<1, 1>(tensor); ++nj) { + const int col_idx_base = col_idx_offset + nj * 8; +#pragma unroll + for (int j = 0; j < size<1, 0>(tensor); ++j) { + const int col_idx = col_idx_base + j; + if (col_idx >= col_idx_limit_right || + (HasWSLeft && col_idx < col_idx_limit_left)) { + tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY; + } } + } + // if (cute::thread0()) { + // printf("mi = %d, i = %d, row_idx = %d, max_seqlen_k = %d\n", mi, i, + // row_idx, max_seqlen_k); print(tensor(make_coord(i, mi), _)); + // // print(tensor(_, j + nj * size<1, 0>(tensor))); + // } } + } } template -inline __device__ void apply_mask_causal(Tensor &tensor, const int col_idx_offset_, - const int max_seqlen_k, const int row_idx_offset, - const int max_seqlen_q, const int warp_row_stride) { - // Causal masking is equivalent to local masking with window_size_left = infinity and window_size_right = 0 - apply_mask_local(tensor, col_idx_offset_, max_seqlen_k, row_idx_offset, - max_seqlen_q, warp_row_stride, -1, 0); +inline __device__ void +apply_mask_causal(Tensor &tensor, const int col_idx_offset_, + const int max_seqlen_k, const int row_idx_offset, + const int max_seqlen_q, const int warp_row_stride) { + // Causal masking is equivalent to local masking with window_size_left = + // infinity and window_size_right = 0 + apply_mask_local(tensor, col_idx_offset_, max_seqlen_k, + row_idx_offset, max_seqlen_q, + warp_row_stride, -1, 0); } -template -inline __device__ void apply_mask_causal_w_idx( - Tensor &tensor, Tensor const &idx_rowcol, - const int col_idx_offset_, const int max_seqlen_k, const int row_idx_offset) -{ - // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) - static_assert(Layout0::rank == 2, "Only support 2D Tensor"); - static_assert(Layout1::rank == 2, "Only support 2D Tensor"); - CUTE_STATIC_ASSERT_V(size<0>(tensor) == size<0>(idx_rowcol)); - CUTE_STATIC_ASSERT_V(size<1>(tensor) == size<1>(idx_rowcol)); - #pragma unroll - for (int mi = 0; mi < size<0>(tensor); ++mi) { - const int col_idx_limit = std::min(max_seqlen_k, 1 + row_idx_offset + get<0>(idx_rowcol(mi, 0))); - #pragma unroll - for (int ni = 0; ni < size<1, 1>(tensor); ++ni) { - if (col_idx_offset_ + get<1>(idx_rowcol(0, ni)) >= col_idx_limit) { - tensor(mi, ni) = -INFINITY; - } - } - // if (cute::thread0()) { - // printf("ni = %d, j = %d, col_idx = %d, max_seqlen_k = %d\n", ni, j, col_idx, max_seqlen_k); - // print(tensor(_, make_coord(j, ni))); - // // print(tensor(_, j + ni * size<1, 0>(tensor))); - // } +template +inline __device__ void +apply_mask_causal_w_idx(Tensor &tensor, + Tensor const &idx_rowcol, + const int col_idx_offset_, const int max_seqlen_k, + const int row_idx_offset) { + // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N)) + static_assert(Layout0::rank == 2, "Only support 2D Tensor"); + static_assert(Layout1::rank == 2, "Only support 2D Tensor"); + CUTE_STATIC_ASSERT_V(size<0>(tensor) == size<0>(idx_rowcol)); + CUTE_STATIC_ASSERT_V(size<1>(tensor) == size<1>(idx_rowcol)); +#pragma unroll + for (int mi = 0; mi < size<0>(tensor); ++mi) { + const int col_idx_limit = + std::min(max_seqlen_k, 1 + row_idx_offset + get<0>(idx_rowcol(mi, 0))); +#pragma unroll + for (int ni = 0; ni < size<1, 1>(tensor); ++ni) { + if (col_idx_offset_ + get<1>(idx_rowcol(0, ni)) >= col_idx_limit) { + tensor(mi, ni) = -INFINITY; + } } + // if (cute::thread0()) { + // printf("ni = %d, j = %d, col_idx = %d, max_seqlen_k = %d\n", ni, j, + // col_idx, max_seqlen_k); print(tensor(_, make_coord(j, ni))); + // // print(tensor(_, j + ni * size<1, 0>(tensor))); + // } + } } -template -inline __device__ void apply_dropout(Tensor &tensor, uint8_t p_dropout_in_uint8_t, - unsigned long long seed, unsigned long long offset, - int block_row_start, int block_col_start, - int block_row_stride) { - // tensor has shape (8, MMA_M, MMA_N / 2) - using T = typename Engine::value_type; - auto encode_dropout = [](bool keep, T val) { - return keep ? val : (encode_dropout_in_sign_bit ? -val : T(0)); - }; - static_assert(decltype(size<2>(tensor))::value % 2 == 0); - const uint16_t p_dropout_8bit_in_uint16_t = uint16_t(p_dropout_in_uint8_t); - const uint32_t p_dropout_8bit_in_uint32_t = (uint32_t(p_dropout_8bit_in_uint16_t) << 16) | uint32_t(p_dropout_8bit_in_uint16_t); - // if (cute::thread0()) { printf("threshold2 = 0x%x\n", p_dropout_8bit_in_uint32_t); } - #pragma unroll - for (int m = 0; m < size<1>(tensor); ++m, block_row_start += block_row_stride) { - uint2 rowcol = make_uint2(block_row_start, block_col_start); - #pragma unroll - for (int n = 0; n < size<2>(tensor) / 2; ++n, ++rowcol.y) { - // if (cute::thread(32, 0)) { printf("m = %d, n = %d, row = %d, col = %d\n", m, n, int(rowcol.x), int(rowcol.y));} - uint4 random_uint4 = flash::philox(seed, reinterpret_cast(rowcol), offset); - // if (cute::thread0()) { printf("philox = %u, %d, %d, %d\n", random_uint4.x, random_uint4.y, random_uint4.z, random_uint4.w);} - uint8_t (&rnd_8)[16] = reinterpret_cast(random_uint4); - // Special implementation for 16-bit types: we duplicate the threshold to the - // low and high 16 bits of a 32-bit value, then use the f16x2 comparison instruction - // to get a mask. The low 16 bits of the mask will be either 0xffff or 0x0000, - // and the high 16 bits will be either 0xffff or 0x0000, depending on whether - // the random value is less than the threshold. - // We then do a bit-wise AND between the mask and the original value (in 32-bit). - // We're exploiting the fact that floating point comparison is equivalent to integer - // comparison, since we're comparing unsigned integers whose top 8-bits are zero. - if (!encode_dropout_in_sign_bit - && (std::is_same::value || std::is_same::value)) { - uint16_t rnd_16[16]; - #pragma unroll - for (int i = 0; i < 16; i++) { rnd_16[i] = uint16_t(rnd_8[i]); } - uint32_t (&rnd_32)[8] = reinterpret_cast(rnd_16); - #pragma unroll - for (int j = 0; j < 2; j++) { - Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); - // if (cute::thread0()) { printf("random = 0x%x, 0x%x, 0x%x, 0x%x\n", rnd_32[j * 4 + 0], rnd_32[j * 4 + 1], rnd_32[j * 4 + 2], rnd_32[j * 4 + 3]); } - // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } - #pragma unroll - for (int i = 0; i < 4; i++) { - uint32_t mask; - asm volatile("set.le.u32.f16x2 %0, %1, %2;\n" : "=r"(mask) : "r"(rnd_32[j * 4 + i]), "r"(p_dropout_8bit_in_uint32_t)); - tensor_uint32(i) &= mask; - } - // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } - } - } else { - #pragma unroll - for (int j = 0; j < 2; j++) { - #pragma unroll - for (int i = 0; i < 8; i++) { - tensor(i, m, n * 2 + j) = encode_dropout(rnd_8[j * 8 + i] <= p_dropout_in_uint8_t, tensor(i, m, n * 2 + j)); - } - Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); - // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } - } - } - // // if ((threadIdx.x == 0) && (blockIdx.x == 0) && (blockIdx.y == 0)) { - // // printf("n = %d, ph Philox: %u, %u, %u, %u\n", n, rnd_8.x, rnd_8.y, rnd_8.z, rnd_8.w); - // // } +template +inline __device__ void +apply_dropout(Tensor &tensor, uint8_t p_dropout_in_uint8_t, + unsigned long long seed, unsigned long long offset, + int block_row_start, int block_col_start, int block_row_stride) { + // tensor has shape (8, MMA_M, MMA_N / 2) + using T = typename Engine::value_type; + auto encode_dropout = [](bool keep, T val) { + return keep ? val : (encode_dropout_in_sign_bit ? -val : T(0)); + }; + static_assert(decltype(size<2>(tensor))::value % 2 == 0); + const uint16_t p_dropout_8bit_in_uint16_t = uint16_t(p_dropout_in_uint8_t); + const uint32_t p_dropout_8bit_in_uint32_t = + (uint32_t(p_dropout_8bit_in_uint16_t) << 16) | + uint32_t(p_dropout_8bit_in_uint16_t); +// if (cute::thread0()) { printf("threshold2 = 0x%x\n", +// p_dropout_8bit_in_uint32_t); } +#pragma unroll + for (int m = 0; m < size<1>(tensor); + ++m, block_row_start += block_row_stride) { + uint2 rowcol = make_uint2(block_row_start, block_col_start); +#pragma unroll + for (int n = 0; n < size<2>(tensor) / 2; ++n, ++rowcol.y) { + // if (cute::thread(32, 0)) { printf("m = %d, n = %d, row = %d, col = + // %d\n", m, n, int(rowcol.x), int(rowcol.y));} + uint4 random_uint4 = flash::philox( + seed, reinterpret_cast(rowcol), offset); + // if (cute::thread0()) { printf("philox = %u, %d, %d, %d\n", + // random_uint4.x, random_uint4.y, random_uint4.z, random_uint4.w);} + uint8_t(&rnd_8)[16] = reinterpret_cast(random_uint4); + // Special implementation for 16-bit types: we duplicate the threshold to + // the low and high 16 bits of a 32-bit value, then use the f16x2 + // comparison instruction to get a mask. The low 16 bits of the mask will + // be either 0xffff or 0x0000, and the high 16 bits will be either 0xffff + // or 0x0000, depending on whether the random value is less than the + // threshold. We then do a bit-wise AND between the mask and the original + // value (in 32-bit). We're exploiting the fact that floating point + // comparison is equivalent to integer comparison, since we're comparing + // unsigned integers whose top 8-bits are zero. + if (!encode_dropout_in_sign_bit && + (std::is_same::value || + std::is_same::value)) { + uint16_t rnd_16[16]; +#pragma unroll + for (int i = 0; i < 16; i++) { + rnd_16[i] = uint16_t(rnd_8[i]); + } + uint32_t(&rnd_32)[8] = reinterpret_cast(rnd_16); +#pragma unroll + for (int j = 0; j < 2; j++) { + Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); +// if (cute::thread0()) { printf("random = 0x%x, 0x%x, 0x%x, 0x%x\n", rnd_32[j * +// 4 + 0], rnd_32[j * 4 + 1], rnd_32[j * 4 + 2], rnd_32[j * 4 + 3]); } if +// (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, 0x%x\n", +// tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), tensor_uint32(3)); } +#pragma unroll + for (int i = 0; i < 4; i++) { + uint32_t mask; + asm volatile("set.le.u32.f16x2 %0, %1, %2;\n" + : "=r"(mask) + : "r"(rnd_32[j * 4 + i]), + "r"(p_dropout_8bit_in_uint32_t)); + tensor_uint32(i) &= mask; + } + // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, + // 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), + // tensor_uint32(3)); } + } + } else { +#pragma unroll + for (int j = 0; j < 2; j++) { +#pragma unroll + for (int i = 0; i < 8; i++) { + tensor(i, m, n * 2 + j) = + encode_dropout(rnd_8[j * 8 + i] <= p_dropout_in_uint8_t, + tensor(i, m, n * 2 + j)); + } + Tensor tensor_uint32 = recast(tensor(_, m, n * 2 + j)); + // if (cute::thread0()) { printf("tensor_uint32 = 0x%x, 0x%x, 0x%x, + // 0x%x\n", tensor_uint32(0), tensor_uint32(1), tensor_uint32(2), + // tensor_uint32(3)); } } + } + // // if ((threadIdx.x == 0) && (blockIdx.x == 0) && (blockIdx.y == 0)) { + // // printf("n = %d, ph Philox: %u, %u, %u, %u\n", n, rnd_8.x, + // rnd_8.y, rnd_8.z, rnd_8.w); + // // } } + } } -} // namespace flash +} // namespace flash diff --git a/external_libs/runtime/flash_attn/lib/utils.h b/external_libs/runtime/flash_attn/lib/utils.h index edf6a60a7..64de735a7 100644 --- a/external_libs/runtime/flash_attn/lib/utils.h +++ b/external_libs/runtime/flash_attn/lib/utils.h @@ -28,33 +28,33 @@ namespace flash { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ uint32_t relu2(const uint32_t x); +template inline __device__ uint32_t relu2(const uint32_t x); -template<> +template <> inline __device__ uint32_t relu2(const uint32_t x) { - uint32_t res; - const uint32_t zero = 0u; + uint32_t res; + const uint32_t zero = 0u; #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - asm volatile("max.f16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); + asm volatile("max.f16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); #else - asm volatile( \ - "{\n" \ - "\t .reg .f16x2 sela;\n" \ - "\t set.gtu.u32.f16x2 sela, %1, %2;\n" \ - "\t and.b32 %0, sela, %1;\n" - "}\n" : "=r"(res) : "r"(x), "r"(zero)); + asm volatile("{\n" + "\t .reg .f16x2 sela;\n" + "\t set.gtu.u32.f16x2 sela, %1, %2;\n" + "\t and.b32 %0, sela, %1;\n" + "}\n" + : "=r"(res) + : "r"(x), "r"(zero)); #endif - return res; + return res; } #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 -template<> +template <> inline __device__ uint32_t relu2(const uint32_t x) { - uint32_t res; - const uint32_t zero = 0u; - asm volatile("max.bf16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); - return res; + uint32_t res; + const uint32_t zero = 0u; + asm volatile("max.bf16x2 %0, %1, %2;\n" : "=r"(res) : "r"(x), "r"(zero)); + return res; } #endif @@ -62,460 +62,513 @@ inline __device__ uint32_t relu2(const uint32_t x) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 -template -inline __device__ uint32_t convert_relu2(const float2 x); +template inline __device__ uint32_t convert_relu2(const float2 x); -template<> +template <> inline __device__ uint32_t convert_relu2(const float2 x) { - uint32_t res; - const uint32_t a = reinterpret_cast(x.x); - const uint32_t b = reinterpret_cast(x.y); - asm volatile("cvt.rn.relu.f16x2.f32 %0, %1, %2;\n" : "=r"(res) : "r"(b), "r"(a)); - return res; + uint32_t res; + const uint32_t a = reinterpret_cast(x.x); + const uint32_t b = reinterpret_cast(x.y); + asm volatile("cvt.rn.relu.f16x2.f32 %0, %1, %2;\n" + : "=r"(res) + : "r"(b), "r"(a)); + return res; } -template<> +template <> inline __device__ uint32_t convert_relu2(const float2 x) { - uint32_t res; - const uint32_t a = reinterpret_cast(x.x); - const uint32_t b = reinterpret_cast(x.y); - asm volatile("cvt.rn.relu.bf16x2.f32 %0, %1, %2;\n" : "=r"(res) : "r"(b), "r"(a)); - return res; + uint32_t res; + const uint32_t a = reinterpret_cast(x.x); + const uint32_t b = reinterpret_cast(x.y); + asm volatile("cvt.rn.relu.bf16x2.f32 %0, %1, %2;\n" + : "=r"(res) + : "r"(b), "r"(a)); + return res; } #endif //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct MaxOp { -__device__ inline T operator()(T const & x, T const & y) { return x > y ? x : y; } +template struct MaxOp { + __device__ inline T operator()(T const &x, T const &y) { + return x > y ? x : y; + } }; -template <> -struct MaxOp { -// This is slightly faster -__device__ inline float operator()(float const &x, float const &y) { return max(x, y); } +template <> struct MaxOp { + // This is slightly faster + __device__ inline float operator()(float const &x, float const &y) { + return max(x, y); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct SumOp { -__device__ inline T operator()(T const & x, T const & y) { return x + y; } +template struct SumOp { + __device__ inline T operator()(T const &x, T const &y) { return x + y; } }; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Allreduce { - static_assert(THREADS == 32 || THREADS == 16 || THREADS == 8 || THREADS == 4); - template - static __device__ inline T run(T x, Operator &op) { - constexpr int OFFSET = THREADS / 2; - x = op(x, __shfl_xor_sync(uint32_t(-1), x, OFFSET)); - return Allreduce::run(x, op); - } +template struct Allreduce { + static_assert(THREADS == 32 || THREADS == 16 || THREADS == 8 || THREADS == 4); + template + static __device__ inline T run(T x, Operator &op) { + constexpr int OFFSET = THREADS / 2; + x = op(x, __shfl_xor_sync(uint32_t(-1), x, OFFSET)); + return Allreduce::run(x, op); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// -template<> -struct Allreduce<2> { -template -static __device__ inline T run(T x, Operator &op) { +template <> struct Allreduce<2> { + template + static __device__ inline T run(T x, Operator &op) { x = op(x, __shfl_xor_sync(uint32_t(-1), x, 1)); return x; -} + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void gemm(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const& tCsA, - Tensor4 const& tCsB, TiledMma tiled_mma, - TiledCopyA smem_tiled_copy_A, TiledCopyB smem_tiled_copy_B, - ThrCopyA smem_thr_copy_A, ThrCopyB smem_thr_copy_B) { - CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M - CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N - CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K - Tensor tCrA_copy_view = smem_thr_copy_A.retile_D(tCrA); - CUTE_STATIC_ASSERT_V(size<1>(tCsA) == size<1>(tCrA_copy_view)); // M - Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); - CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N - if (!A_in_regs) { cute::copy(smem_tiled_copy_A, tCsA(_, _, _0{}), tCrA_copy_view(_, _, _0{})); } - if (!B_in_regs) { cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); } - #pragma unroll - for (int i = 0; i < size<2>(tCrA); ++i) { - if (i < size<2>(tCrA) - 1) { - if (!A_in_regs) { cute::copy(smem_tiled_copy_A, tCsA(_, _, i + 1), tCrA_copy_view(_, _, i + 1)); } - if (!B_in_regs) { cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), tCrB_copy_view(_, _, i + 1)); } - } - cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); +template +inline __device__ void +gemm(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const &tCsA, + Tensor4 const &tCsB, TiledMma tiled_mma, TiledCopyA smem_tiled_copy_A, + TiledCopyB smem_tiled_copy_B, ThrCopyA smem_thr_copy_A, + ThrCopyB smem_thr_copy_B) { + CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M + CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N + CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K + Tensor tCrA_copy_view = smem_thr_copy_A.retile_D(tCrA); + CUTE_STATIC_ASSERT_V(size<1>(tCsA) == size<1>(tCrA_copy_view)); // M + Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); + CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N + if (!A_in_regs) { + cute::copy(smem_tiled_copy_A, tCsA(_, _, _0{}), tCrA_copy_view(_, _, _0{})); + } + if (!B_in_regs) { + cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); + } +#pragma unroll + for (int i = 0; i < size<2>(tCrA); ++i) { + if (i < size<2>(tCrA) - 1) { + if (!A_in_regs) { + cute::copy(smem_tiled_copy_A, tCsA(_, _, i + 1), + tCrA_copy_view(_, _, i + 1)); + } + if (!B_in_regs) { + cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), + tCrB_copy_view(_, _, i + 1)); + } } + cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void gemm_A_in_regs(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const& tCsB, - TiledMma tiled_mma, TiledCopy smem_tiled_copy_B, - ThrCopy smem_thr_copy_B) { - CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M - CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N - CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K - Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); - CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N - cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); - #pragma unroll - for (int i = 0; i < size<2>(tCrA); ++i) { - if (i < size<2>(tCrA) - 1) { - cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), tCrB_copy_view(_, _, i + 1)); - } - cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); +template +inline __device__ void +gemm_A_in_regs(Tensor0 &acc, Tensor1 &tCrA, Tensor2 &tCrB, Tensor3 const &tCsB, + TiledMma tiled_mma, TiledCopy smem_tiled_copy_B, + ThrCopy smem_thr_copy_B) { + CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc)); // MMA_M + CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc)); // MMA_N + CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB)); // MMA_K + Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB); + CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view)); // N + cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{})); +#pragma unroll + for (int i = 0; i < size<2>(tCrA); ++i) { + if (i < size<2>(tCrA) - 1) { + cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), + tCrB_copy_view(_, _, i + 1)); } + cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc); + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -// Convert acc_layout from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N)) -template +// Convert acc_layout from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, +// MMA_N)) +template inline __device__ auto convert_layout_acc_rowcol(Layout acc_layout) { - static_assert(decltype(size<0>(acc_layout))::value == 4); - static_assert(decltype(rank(acc_layout))::value == 3); - auto l = logical_divide(acc_layout, Shape<_2>{}); // ((2, 2), MMA_M, MMA_N) - // TD [2023-08-13]: Idk why but get<0, 1>(l) doesn't work for Cutlass 3.2, I'm getting - // "int_tuple.hpp(74): error: conversion to inaccessible base class" - // return make_layout(make_layout(get<0, 1>(l), get<1>(l)), make_layout(get<0, 0>(l), get<2>(l))); - return make_layout(make_layout(get<1>(get<0>(l)), get<1>(l)), make_layout(get<0>(get<0>(l)), get<2>(l))); + static_assert(decltype(size<0>(acc_layout))::value == 4); + static_assert(decltype(rank(acc_layout))::value == 3); + auto l = logical_divide(acc_layout, Shape<_2>{}); // ((2, 2), MMA_M, MMA_N) + // TD [2023-08-13]: Idk why but get<0, 1>(l) doesn't work for Cutlass 3.2, I'm + // getting "int_tuple.hpp(74): error: conversion to inaccessible base class" + // return make_layout(make_layout(get<0, 1>(l), get<1>(l)), make_layout(get<0, + // 0>(l), get<2>(l))); + return make_layout(make_layout(get<1>(get<0>(l)), get<1>(l)), + make_layout(get<0>(get<0>(l)), get<2>(l))); }; //////////////////////////////////////////////////////////////////////////////////////////////////// -// Convert rowcol_layout from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2) -// if using m16n8k16, or to ((2, 2, 1), MMA_M, MMA_N) if using m16n8k8. -template +// Convert rowcol_layout from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), +// MMA_M, MMA_N / 2) if using m16n8k16, or to ((2, 2, 1), MMA_M, MMA_N) if using +// m16n8k8. +template inline __device__ auto convert_layout_rowcol_Aregs(Layout rowcol_layout) { - using X = Underscore; - static_assert(decltype(size<0, 0>(rowcol_layout))::value == 2); - static_assert(decltype(size<1, 0>(rowcol_layout))::value == 2); - constexpr int mma_shape_K = get<2>(typename MMA_traits::Shape_MNK{}); - static_assert(mma_shape_K == 8 || mma_shape_K == 16); - constexpr int MMA_N_divisor = mma_shape_K == 8 ? 1 : 2; - auto l = logical_divide(rowcol_layout, Shape>>{}); // ((2, MMA_M), (2, (2, MMA_N / 2))) - // TD [2023-08-13]: Same error as above on Cutlass 3.2 - // return make_layout(make_layout(get<1, 0>(l), get<0, 0>(l), get<1, 1, 0>(l)), - // get<0, 1>(l), - // get<1, 1, 1>(l)); - return make_layout(make_layout(get<0>(get<1>(l)), get<0>(get<0>(l)), get<0>(get<1>(get<1>(l)))), - get<1>(get<0>(l)), - get<1>(get<1>(get<1>(l)))); + using X = Underscore; + static_assert(decltype(size<0, 0>(rowcol_layout))::value == 2); + static_assert(decltype(size<1, 0>(rowcol_layout))::value == 2); + constexpr int mma_shape_K = get<2>(typename MMA_traits::Shape_MNK{}); + static_assert(mma_shape_K == 8 || mma_shape_K == 16); + constexpr int MMA_N_divisor = mma_shape_K == 8 ? 1 : 2; + auto l = logical_divide( + rowcol_layout, + Shape>>{}); // ((2, MMA_M), (2, (2, MMA_N / + // 2))) + // TD [2023-08-13]: Same error as above on Cutlass 3.2 + // return make_layout(make_layout(get<1, 0>(l), get<0, 0>(l), get<1, 1, + // 0>(l)), + // get<0, 1>(l), + // get<1, 1, 1>(l)); + return make_layout(make_layout(get<0>(get<1>(l)), get<0>(get<0>(l)), + get<0>(get<1>(get<1>(l)))), + get<1>(get<0>(l)), get<1>(get<1>(get<1>(l)))); }; //////////////////////////////////////////////////////////////////////////////////////////////////// template inline __device__ auto convert_type(Tensor const &tensor) { - using From_type = typename Engine::value_type; - constexpr int numel = decltype(size(tensor))::value; - cutlass::NumericArrayConverter convert_op; - // HACK: this requires tensor to be "contiguous" - auto frag = convert_op(*reinterpret_cast *>(tensor.data())); - return make_tensor(make_rmem_ptr(&frag), tensor.layout()); + using From_type = typename Engine::value_type; + constexpr int numel = decltype(size(tensor))::value; + cutlass::NumericArrayConverter convert_op; + // HACK: this requires tensor to be "contiguous" + auto frag = + convert_op(*reinterpret_cast *>( + tensor.data())); + return make_tensor(make_rmem_ptr(&frag), tensor.layout()); } //////////////////////////////////////////////////////////////////////////////////////////////////// template inline __device__ void relu_(Tensor &tensor) { - constexpr int numel = decltype(size(tensor))::value; - static_assert(numel % 2 == 0); - using value_t = typename Engine::value_type; - // HACK: this requires tensor to be "contiguous" - Tensor tensor_uint32 = recast(tensor); - #pragma unroll - for (int i = 0; i < size(tensor_uint32); ++i) { - tensor_uint32(i) = relu2(tensor_uint32(i)); - } + constexpr int numel = decltype(size(tensor))::value; + static_assert(numel % 2 == 0); + using value_t = typename Engine::value_type; + // HACK: this requires tensor to be "contiguous" + Tensor tensor_uint32 = recast(tensor); +#pragma unroll + for (int i = 0; i < size(tensor_uint32); ++i) { + tensor_uint32(i) = relu2(tensor_uint32(i)); + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -// On SM80 and above, we can fuse fp32 -> fp16/bf16 conversion and relu into 1 instruction +// On SM80 and above, we can fuse fp32 -> fp16/bf16 conversion and relu into 1 +// instruction template inline __device__ auto convert_type_relu(Tensor const &tensor) { - using From_type = typename Engine::value_type; - static_assert(std::is_same_v || std::is_same_v); - static_assert(std::is_same_v); - constexpr int numel = decltype(size(tensor))::value; - static_assert(numel % 2 == 0); + using From_type = typename Engine::value_type; + static_assert(std::is_same_v || + std::is_same_v); + static_assert(std::is_same_v); + constexpr int numel = decltype(size(tensor))::value; + static_assert(numel % 2 == 0); #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 - // HACK: this requires tensor to be "contiguous" - Tensor tensor_float2 = recast(tensor); - Tensor out_uint32 = make_tensor(tensor_float2.layout()); - #pragma unroll - for (int i = 0; i < size(out_uint32); ++i) { - out_uint32(i) = convert_relu2(tensor_float2(i)); - } - Tensor out = make_tensor(make_rmem_ptr(out_uint32.data()), tensor.layout()); + // HACK: this requires tensor to be "contiguous" + Tensor tensor_float2 = recast(tensor); + Tensor out_uint32 = make_tensor(tensor_float2.layout()); +#pragma unroll + for (int i = 0; i < size(out_uint32); ++i) { + out_uint32(i) = convert_relu2(tensor_float2(i)); + } + Tensor out = + make_tensor(make_rmem_ptr(out_uint32.data()), tensor.layout()); #else - Tensor out = flash::convert_type(tensor); - flash::relu_(out); + Tensor out = flash::convert_type(tensor); + flash::relu_(out); #endif - return out; + return out; } //////////////////////////////////////////////////////////////////////////////////////////////////// -// Blocks until all but N previous cp.async.commit_group operations have committed. -// This differs from cute::cp_async_wait in that when N = 0 we don't call cp.async.wait_all -// (which is equivalent to commit_group then wait_group 0). -// Instead we just call cp.async.wait_group 0, which is slightly faster. +// Blocks until all but N previous cp.async.commit_group operations have +// committed. This differs from cute::cp_async_wait in that when N = 0 we don't +// call cp.async.wait_all (which is equivalent to commit_group then wait_group +// 0). Instead we just call cp.async.wait_group 0, which is slightly faster. // https://github.com/NVIDIA/cutlass/blob/master/include/cute/arch/copy_sm80.hpp#L113 -template -CUTE_HOST_DEVICE -void cp_async_wait() { +template CUTE_HOST_DEVICE void cp_async_wait() { #if defined(CUTE_ARCH_CP_ASYNC_SM80_ENABLED) - asm volatile("cp.async.wait_group %0;\n" :: "n"(N)); + asm volatile("cp.async.wait_group %0;\n" ::"n"(N)); #endif } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void copy(TiledCopy tiled_copy, Tensor const &S, - Tensor &D, Tensor const &identity_MN, - Tensor const &predicate_K, const int max_MN=0) { - CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); - CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); - CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K - // There's no case where !Clear_OOB_K && Clear_OOB_MN - static_assert(!(Clear_OOB_MN && !Clear_OOB_K)); - #pragma unroll - for (int m = 0; m < size<1>(S); ++m) { - if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { - #pragma unroll - for (int k = 0; k < size<2>(S); ++k) { - if (Is_even_K || predicate_K(k)) { - cute::copy(tiled_copy, S(_, m, k), D(_, m, k)); - } else if (Clear_OOB_K) { - cute::clear(D(_, m, k)); - } - } - } else if (Clear_OOB_MN) { - cute::clear(D(_, m, _)); +template +inline __device__ void +copy(TiledCopy tiled_copy, Tensor const &S, + Tensor &D, Tensor const &identity_MN, + Tensor const &predicate_K, const int max_MN = 0) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K + // There's no case where !Clear_OOB_K && Clear_OOB_MN + static_assert(!(Clear_OOB_MN && !Clear_OOB_K)); +#pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { +#pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || predicate_K(k)) { + cute::copy(tiled_copy, S(_, m, k), D(_, m, k)); + } else if (Clear_OOB_K) { + cute::clear(D(_, m, k)); } + } + } else if (Clear_OOB_MN) { + cute::clear(D(_, m, _)); } - // TD [2023-04-13]: Strange that the code below can cause race condition. - // I think it's because the copies are under an if statement. - // if (Is_even_K) { - // #pragma unroll - // for (int m = 0; m < size<1>(S); ++m) { - // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { - // copy(tiled_copy, S(_, m, _), D(_, m, _)); - // } else if (Clear_OOB_MN) { - // clear(D(_, m, _)); - // } - // } - // } else { // It's slightly faster in this case if iterate over K first - // #pragma unroll - // for (int k = 0; k < size<2>(S); ++k) { - // if (predicate_K(k)) { - // #pragma unroll - // for (int m = 0; m < size<1>(S); ++m) { - // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { - // copy(tiled_copy, S(_, m, k), D(_, m, k)); - // } else if (Clear_OOB_MN) { - // clear(D(_, m, k)); - // } - // } - // } else if (Clear_OOB_K) { // There's no case where !Clear_OOB_K && Clear_OOB_MN - // if (Clear_OOB_MN || Is_even_MN) { - // clear(D(_, _, k)); - // } else { - // #pragma unroll - // for (int m = 0; m < size<1>(S); ++m) { - // if (!(Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN)) { - // clear(D(_, m, k)); - // } - // } - // } - // } - // } - // } + } + // TD [2023-04-13]: Strange that the code below can cause race condition. + // I think it's because the copies are under an if statement. + // if (Is_even_K) { + // #pragma unroll + // for (int m = 0; m < size<1>(S); ++m) { + // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { + // copy(tiled_copy, S(_, m, _), D(_, m, _)); + // } else if (Clear_OOB_MN) { + // clear(D(_, m, _)); + // } + // } + // } else { // It's slightly faster in this case if iterate over K first + // #pragma unroll + // for (int k = 0; k < size<2>(S); ++k) { + // if (predicate_K(k)) { + // #pragma unroll + // for (int m = 0; m < size<1>(S); ++m) { + // if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) { + // copy(tiled_copy, S(_, m, k), D(_, m, k)); + // } else if (Clear_OOB_MN) { + // clear(D(_, m, k)); + // } + // } + // } else if (Clear_OOB_K) { // There's no case where !Clear_OOB_K && + // Clear_OOB_MN + // if (Clear_OOB_MN || Is_even_MN) { + // clear(D(_, _, k)); + // } else { + // #pragma unroll + // for (int m = 0; m < size<1>(S); ++m) { + // if (!(Is_even_MN || get<0>(identity_MN(0, m, 0)) < + // max_MN)) { + // clear(D(_, m, k)); + // } + // } + // } + // } + // } + // } } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void copy_w_min_idx(Tensor const &S, - Tensor &D, Tensor const &identity_MN, - Tensor const &predicate_K, - const int max_MN=0, const int min_MN=0) { - CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); - CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); - CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K - // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, max_MN = %d, min_MN = %d\n", blockIdx.y, max_MN, min_MN); } - #pragma unroll - for (int m = 0; m < size<1>(S); ++m) { - // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, m = %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); } - if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) { - // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("Inner loop, blockIdx.y = %d, m = %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); } - #pragma unroll - for (int k = 0; k < size<2>(S); ++k) { - if (Is_even_K || predicate_K(k)) { - cute::copy(S(_, m, k), D(_, m, k)); - } - } +template +inline __device__ void +copy_w_min_idx(Tensor const &S, Tensor &D, + Tensor const &identity_MN, + Tensor const &predicate_K, + const int max_MN = 0, const int min_MN = 0) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K +// if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, max_MN = +// %d, min_MN = %d\n", blockIdx.y, max_MN, min_MN); } +#pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, m = + // %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); } + if (get<0>(identity_MN(0, m, 0)) >= min_MN && + get<0>(identity_MN(0, m, 0)) < max_MN) { +// if (threadIdx.x == 0 && blockIdx.z == 0) { printf("Inner loop, blockIdx.y = +// %d, m = %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); } +#pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || predicate_K(k)) { + cute::copy(S(_, m, k), D(_, m, k)); } + } } + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void copy_rotary_interleaved(Tensor const &S, - Tensor &D, - Tensor const &Cos, - Tensor const &Sin, - Tensor const &identity_MN, - const int max_MN, const int min_MN, - const int dim, const int rotary_dim) { - CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); - CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); - CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos)); // MMA_K - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin)); // MMA_K - CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin)); // MMA_K - static_assert(decltype(size<0>(S))::value == decltype(size<0>(Cos))::value * 2); - static_assert(decltype(size<0>(Cos))::value % 2 == 0); // Since we do fast conversion from fp16/bf16 to fp32 - Tensor rCos = make_fragment_like(Cos); - Tensor rSin = make_fragment_like(Sin); - Tensor rS = make_fragment_like(S); - #pragma unroll - for (int m = 0; m < size<1>(S); ++m) { - if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) { - #pragma unroll - for (int k = 0; k < size<2>(S); ++k) { - if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) { - cute::copy(S(_, m, k), rS(_, m, k)); - if (get<1>(identity_MN(0, 0, k)) < rotary_dim) { - cute::copy(Cos(_, m, k), rCos(_, m, k)); - cute::copy(Sin(_, m, k), rSin(_, m, k)); - Tensor S_fp32 = convert_type(rS(_, m, k)); - Tensor cos_fp32 = convert_type(rCos(_, m, k)); - Tensor sin_fp32 = convert_type(rSin(_, m, k)); - #pragma unroll - for (int i = 0; i < size<0>(rS) / 2; ++i) { - float real = S_fp32(2 * i) * cos_fp32(i) - S_fp32(2 * i + 1) * sin_fp32(i); - float imag = S_fp32(2 * i) * sin_fp32(i) + S_fp32(2 * i + 1) * cos_fp32(i); - S_fp32(2 * i) = real; - S_fp32(2 * i + 1) = imag; - } - // Idk but I need to copy for the convert_type to work - Tensor S_fp32_copy = make_fragment_like(S_fp32); - cute::copy(S_fp32, S_fp32_copy); - using T = typename Engine0::value_type; - Tensor S_og_type = convert_type(S_fp32_copy); - cute::copy(S_og_type, rS(_, m, k)); - } - cute::copy(rS(_, m, k), D(_, m, k)); - } else if (Clear_OOB_K) { - cute::clear(D(_, m, k)); - } +template +inline __device__ void copy_rotary_interleaved( + Tensor const &S, Tensor &D, + Tensor const &Cos, Tensor const &Sin, + Tensor const &identity_MN, const int max_MN, + const int min_MN, const int dim, const int rotary_dim) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin)); // MMA_K + CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin)); // MMA_K + static_assert(decltype(size<0>(S))::value == + decltype(size<0>(Cos))::value * 2); + static_assert(decltype(size<0>(Cos))::value % 2 == + 0); // Since we do fast conversion from fp16/bf16 to fp32 + Tensor rCos = make_fragment_like(Cos); + Tensor rSin = make_fragment_like(Sin); + Tensor rS = make_fragment_like(S); +#pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + if (get<0>(identity_MN(0, m, 0)) >= min_MN && + get<0>(identity_MN(0, m, 0)) < max_MN) { +#pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) { + cute::copy(S(_, m, k), rS(_, m, k)); + if (get<1>(identity_MN(0, 0, k)) < rotary_dim) { + cute::copy(Cos(_, m, k), rCos(_, m, k)); + cute::copy(Sin(_, m, k), rSin(_, m, k)); + Tensor S_fp32 = convert_type(rS(_, m, k)); + Tensor cos_fp32 = convert_type(rCos(_, m, k)); + Tensor sin_fp32 = convert_type(rSin(_, m, k)); +#pragma unroll + for (int i = 0; i < size<0>(rS) / 2; ++i) { + float real = + S_fp32(2 * i) * cos_fp32(i) - S_fp32(2 * i + 1) * sin_fp32(i); + float imag = + S_fp32(2 * i) * sin_fp32(i) + S_fp32(2 * i + 1) * cos_fp32(i); + S_fp32(2 * i) = real; + S_fp32(2 * i + 1) = imag; } + // Idk but I need to copy for the convert_type to work + Tensor S_fp32_copy = make_fragment_like(S_fp32); + cute::copy(S_fp32, S_fp32_copy); + using T = typename Engine0::value_type; + Tensor S_og_type = convert_type(S_fp32_copy); + cute::copy(S_og_type, rS(_, m, k)); + } + cute::copy(rS(_, m, k), D(_, m, k)); + } else if (Clear_OOB_K) { + cute::clear(D(_, m, k)); } + } } + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline __device__ void copy_rotary_contiguous(Tensor const &S, - Tensor &D, - Tensor const &Cos, - Tensor const &Sin, - Tensor const &identity_MN, - const int max_MN, const int min_MN, - const int dim, const int rotary_dim) { - CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); - CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); - CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos)); // MMA_K - CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin)); // MMA_M - CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin)); // MMA_K - CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(Cos)); // MMA - CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin)); - static_assert(decltype(size<0>(Cos))::value % 2 == 0); // Since we do fast conversion from fp16/bf16 to fp32 - Tensor rCos = make_fragment_like(Cos); - Tensor rSin = make_fragment_like(Sin); - Tensor rS = make_fragment_like(S); - Tensor rS_other = make_fragment_like(rS(_, 0, 0)); - #pragma unroll - for (int m = 0; m < size<1>(S); ++m) { - if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) { - #pragma unroll - for (int k = 0; k < size<2>(S); ++k) { - if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) { - cute::copy(S(_, m, k), rS(_, m, k)); - if (get<1>(identity_MN(0, 0, k)) < rotary_dim) { - const bool is_left = get<1>(identity_MN(0, 0, k)) < rotary_dim / 2; - Tensor gS_other = make_tensor(S(_, m, k).data() + (is_left ? rotary_dim / 2 : -rotary_dim / 2), S(_, m, k).layout()); - cute::copy(gS_other, rS_other); - // if (cute::thread0()) { print_tensor(rS(_, m, k)); print_tensor(rS_other); } - Tensor gCos = make_tensor(Cos(_, m, k).data() + (is_left ? 0 : -rotary_dim / 2), Cos(_, m, k).layout()); - Tensor gSin = make_tensor(Sin(_, m, k).data() + (is_left ? 0 : -rotary_dim / 2), Sin(_, m, k).layout()); - cute::copy(gCos, rCos(_, m, k)); - cute::copy(gSin, rSin(_, m, k)); - // if (cute::thread0()) { print_tensor(rCos(_, m, k)); print_tensor(rSin(_, m, k)); } - Tensor S_fp32 = convert_type(rS(_, m, k)); - Tensor S_other_fp32 = convert_type(rS_other); - Tensor cos_fp32 = convert_type(rCos(_, m, k)); - Tensor sin_fp32 = convert_type(rSin(_, m, k)); - #pragma unroll - for (int i = 0; i < size<0>(rS); ++i) { - S_fp32(i) = S_fp32(i) * cos_fp32(i) + S_other_fp32(i) * (is_left ? -sin_fp32(i) : sin_fp32(i)); - } - // Idk but I need to copy for the convert_type to work - Tensor S_fp32_copy = make_fragment_like(S_fp32); - cute::copy(S_fp32, S_fp32_copy); - using T = typename Engine0::value_type; - Tensor S_og_type = convert_type(S_fp32_copy); - cute::copy(S_og_type, rS(_, m, k)); - // if (cute::thread0()) { print_tensor(rS(_, m, k)); } - } - cute::copy(rS(_, m, k), D(_, m, k)); - } else if (Clear_OOB_K) { - cute::clear(D(_, m, k)); - } +template +inline __device__ void copy_rotary_contiguous( + Tensor const &S, Tensor &D, + Tensor const &Cos, Tensor const &Sin, + Tensor const &identity_MN, const int max_MN, + const int min_MN, const int dim, const int rotary_dim) { + CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{}); + CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{}); + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D)); // MMA + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos)); // MMA_K + CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin)); // MMA_M + CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin)); // MMA_K + CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(Cos)); // MMA + CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin)); + static_assert(decltype(size<0>(Cos))::value % 2 == + 0); // Since we do fast conversion from fp16/bf16 to fp32 + Tensor rCos = make_fragment_like(Cos); + Tensor rSin = make_fragment_like(Sin); + Tensor rS = make_fragment_like(S); + Tensor rS_other = make_fragment_like(rS(_, 0, 0)); +#pragma unroll + for (int m = 0; m < size<1>(S); ++m) { + if (get<0>(identity_MN(0, m, 0)) >= min_MN && + get<0>(identity_MN(0, m, 0)) < max_MN) { +#pragma unroll + for (int k = 0; k < size<2>(S); ++k) { + if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) { + cute::copy(S(_, m, k), rS(_, m, k)); + if (get<1>(identity_MN(0, 0, k)) < rotary_dim) { + const bool is_left = get<1>(identity_MN(0, 0, k)) < rotary_dim / 2; + Tensor gS_other = + make_tensor(S(_, m, k).data() + + (is_left ? rotary_dim / 2 : -rotary_dim / 2), + S(_, m, k).layout()); + cute::copy(gS_other, rS_other); + // if (cute::thread0()) { print_tensor(rS(_, m, k)); + // print_tensor(rS_other); } + Tensor gCos = make_tensor(Cos(_, m, k).data() + + (is_left ? 0 : -rotary_dim / 2), + Cos(_, m, k).layout()); + Tensor gSin = make_tensor(Sin(_, m, k).data() + + (is_left ? 0 : -rotary_dim / 2), + Sin(_, m, k).layout()); + cute::copy(gCos, rCos(_, m, k)); + cute::copy(gSin, rSin(_, m, k)); + // if (cute::thread0()) { print_tensor(rCos(_, m, k)); + // print_tensor(rSin(_, m, k)); } + Tensor S_fp32 = convert_type(rS(_, m, k)); + Tensor S_other_fp32 = convert_type(rS_other); + Tensor cos_fp32 = convert_type(rCos(_, m, k)); + Tensor sin_fp32 = convert_type(rSin(_, m, k)); +#pragma unroll + for (int i = 0; i < size<0>(rS); ++i) { + S_fp32(i) = + S_fp32(i) * cos_fp32(i) + + S_other_fp32(i) * (is_left ? -sin_fp32(i) : sin_fp32(i)); } + // Idk but I need to copy for the convert_type to work + Tensor S_fp32_copy = make_fragment_like(S_fp32); + cute::copy(S_fp32, S_fp32_copy); + using T = typename Engine0::value_type; + Tensor S_og_type = convert_type(S_fp32_copy); + cute::copy(S_og_type, rS(_, m, k)); + // if (cute::thread0()) { print_tensor(rS(_, m, k)); } + } + cute::copy(rS(_, m, k), D(_, m, k)); + } else if (Clear_OOB_K) { + cute::clear(D(_, m, k)); } + } } + } } //////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace flash +} // namespace flash diff --git a/runtime/test/test_files/external_libs/libflash_attn.so b/runtime/test/test_files/external_libs/libflash_attn.so index 3f53bd7d4..8f0cfff40 100755 --- a/runtime/test/test_files/external_libs/libflash_attn.so +++ b/runtime/test/test_files/external_libs/libflash_attn.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f00cff217327552d99a2341b768389bfffbb38934e0b193d1eb77b6b0e84efcd -size 339166176 +oid sha256:b8e8e3639fa7bff088ba97ffc626cdb7fe2038b0ba36344e6a28b35a1a1ee94d +size 312538456 diff --git a/runtime/test/test_files/flash_attn_bwd.mlir b/runtime/test/test_files/flash_attn_bwd.mlir index 484a54af4..e9e3dea11 100644 --- a/runtime/test/test_files/flash_attn_bwd.mlir +++ b/runtime/test/test_files/flash_attn_bwd.mlir @@ -12,7 +12,7 @@ module attributes {byre.container_module} { %arg10 : memref<1x3x128xf32, "cuda"> {byre.argname = "d_SoftmaxLse", byre.argtype = 2: i32}, %arg11 : memref<1x3x128x32xf32, "cuda"> {byre.argname = "d_Q_accum", byre.argtype = 2: i32}, %arg12 : memref<1x3x128x128xf32, "cuda"> {byre.argname = "SoftmaxPtr", byre.argtype = 2: i32}) attributes {byre.entry_point} { - "byre.custom"(%arg1, %arg2, %arg3, %arg4, %arg5, %arg12, %arg9) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_fwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda">, memref<2xi64, "cuda">) -> () + "byre.custom"(%arg1, %arg2, %arg3, %arg9, %arg4, %arg5, %arg12) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_fwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<2xi64, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda">) -> () "byre.custom"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg9, %arg6, %arg7, %arg8, %arg10, %arg11) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_bwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<2xi64, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x32xf32, "cuda">) -> () return } diff --git a/runtime/test/test_files/flash_attn_fwd.mlir b/runtime/test/test_files/flash_attn_fwd.mlir index e505fa621..bf454feb0 100644 --- a/runtime/test/test_files/flash_attn_fwd.mlir +++ b/runtime/test/test_files/flash_attn_fwd.mlir @@ -6,7 +6,7 @@ module attributes {byre.container_module} { %arg4 : memref<1x3x128xf32, "cuda"> {byre.argname = "SoftmaxLse", byre.argtype = 2: i32}, %arg5 : memref<1x3x128x128xf32, "cuda"> {byre.argname = "SoftmaxPtr", byre.argtype = 2: i32}, %arg6 : memref<2xi64, "cuda"> {byre.argname = "RngState", byre.argtype = 2: i32}) attributes {byre.entry_point} { - "byre.custom"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_fwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda">, memref<2xi64, "cuda">) -> () + "byre.custom"(%arg0, %arg1, %arg2, %arg6, %arg3, %arg4, %arg5) {callee = "custom", lib_path = "test/test_files/external_libs/libflash_attn.so", api_name = "run_flash_attn_fwd", extra_args = [12288 : i64, 12288 : i64, 12288 : i64, 12288 : i64, 96 : i64, 96 : i64, 96 : i64, 96 : i64, 32 : i64, 32 : i64, 32 : i64, 32 : i64, 1 : i64, 3 : i64, 3 : i64, 32 : i64, 32 : i64, 0.5 : f32, 128 : i64, 128 : i64, 128 : i64, 128 : i64, 0.0 : f32, -1 : i64, 0 : i64]} : (memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<2xi64, "cuda">, memref<1x128x3x32xf16, "cuda">, memref<1x3x128xf32, "cuda">, memref<1x3x128x128xf32, "cuda">) -> () return } } diff --git a/scripts/prepare.sh b/scripts/prepare.sh index a5cbff5ce..43a612e05 100755 --- a/scripts/prepare.sh +++ b/scripts/prepare.sh @@ -29,6 +29,11 @@ function load_llvm_prebuilt() { LLVM_INSTALL_DIR="/data00/llvm_libraries/4592543a01609feb4b3c19e81a9d54743e15e329/llvm_build" } +function lfs_pull_external_libs() { + git lfs pull --include runtime/test/test_files/external_libs/libflash_attn.so + git lfs pull --include external_libs/libs/libflash_attn.so +} + function prepare_for_compiler() { git submodule update --init --recursive -f external/mlir-hlo external/AITemplate apply_aitemplate_patches @@ -39,4 +44,5 @@ function prepare_for_compiler() { function prepare_for_runtime() { git submodule update --init --recursive -f external/mlir-hlo external/cutlass external/date external/googletest external/pybind11 load_llvm_prebuilt + lfs_pull_external_libs } diff --git a/tests/numerical_test/main.py b/tests/numerical_test/main.py index ed46a05cd..9f7db641e 100644 --- a/tests/numerical_test/main.py +++ b/tests/numerical_test/main.py @@ -116,16 +116,14 @@ def main(): if args.config == 'all': results = run_mlir_test(arch) results = results + run_torch_test(arch) - # TODO(zzk): disable flash attn test for now - # run_torch_dynamo_tests(arch) + run_torch_dynamo_tests(arch) elif args.config == 'mlir': results = run_mlir_test(arch) elif args.config == 'torch': results = run_torch_test(arch) elif args.config == 'dynamo': # TODO(zzk): use test infra for dynamo tests - # TODO(zzk): disable flash attn test for now - # run_torch_dynamo_tests(arch) + run_torch_dynamo_tests(arch) pass failed = report_results(results) sys.exit(1 if failed else 0) From ff3056e5932fe5deb99076116e25f25c7a4d358e Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Tue, 23 Jan 2024 22:01:36 +0000 Subject: [PATCH 4/9] format --- runtime/lib/core/framework/op_accessor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/lib/core/framework/op_accessor.cc b/runtime/lib/core/framework/op_accessor.cc index ec4e46213..8801942b4 100644 --- a/runtime/lib/core/framework/op_accessor.cc +++ b/runtime/lib/core/framework/op_accessor.cc @@ -215,11 +215,11 @@ void *OpAccessor::GetAttrAsVoidPtr(const std::string &name) const { for (Attribute elementAttr : attr) { if (auto floatAttr = dyn_cast(elementAttr)) { float val = floatAttr.getValueAsDouble(); - std::memcpy(static_cast(result) + ptr, &val, sizeof(float)); + std::memcpy(static_cast(result) + ptr, &val, sizeof(float)); ptr += sizeof(float); } else if (auto intAttr = dyn_cast(elementAttr)) { int64_t val = intAttr.getInt(); - std::memcpy(static_cast(result) + ptr, &val, sizeof(int64_t)); + std::memcpy(static_cast(result) + ptr, &val, sizeof(int64_t)); ptr += sizeof(int64_t); } } From a952db98232a17c7897ff62800067100e6551e7d Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Tue, 23 Jan 2024 22:11:31 +0000 Subject: [PATCH 5/9] remove external_libs dep & replace it with byteir's --- external/cutlass | 2 +- external_libs/external/cutlass | 1 - external_libs/external/half/LICENSE.txt | 21 - external_libs/external/half/README.txt | 317 -- .../external/half/include/half/half.hpp | 4601 ----------------- external_libs/runtime/CMakeLists.txt | 3 +- 6 files changed, 3 insertions(+), 4942 deletions(-) delete mode 160000 external_libs/external/cutlass delete mode 100644 external_libs/external/half/LICENSE.txt delete mode 100644 external_libs/external/half/README.txt delete mode 100644 external_libs/external/half/include/half/half.hpp diff --git a/external/cutlass b/external/cutlass index c4f6b8c6b..a75b4ac48 160000 --- a/external/cutlass +++ b/external/cutlass @@ -1 +1 @@ -Subproject commit c4f6b8c6bc94ff69048492fb34df0dfaf1983933 +Subproject commit a75b4ac483166189a45290783cb0a18af5ff0ea5 diff --git a/external_libs/external/cutlass b/external_libs/external/cutlass deleted file mode 160000 index a75b4ac48..000000000 --- a/external_libs/external/cutlass +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a75b4ac483166189a45290783cb0a18af5ff0ea5 diff --git a/external_libs/external/half/LICENSE.txt b/external_libs/external/half/LICENSE.txt deleted file mode 100644 index 45f55db55..000000000 --- a/external_libs/external/half/LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) 2012-2021 Christian Rau - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/external_libs/external/half/README.txt b/external_libs/external/half/README.txt deleted file mode 100644 index 3dd0d1c2d..000000000 --- a/external_libs/external/half/README.txt +++ /dev/null @@ -1,317 +0,0 @@ -HALF-PRECISION FLOATING-POINT LIBRARY (Version 2.2.0) ------------------------------------------------------ - -This is a C++ header-only library to provide an IEEE 754 conformant 16-bit -half-precision floating-point type along with corresponding arithmetic -operators, type conversions and common mathematical functions. It aims for both -efficiency and ease of use, trying to accurately mimic the behaviour of the -built-in floating-point types at the best performance possible. - - -INSTALLATION AND REQUIREMENTS ------------------------------ - -Conveniently, the library consists of just a single header file containing all -the functionality, which can be directly included by your projects, without the -neccessity to build anything or link to anything. - -Whereas this library is fully C++98-compatible, it can profit from certain -C++11 features. Support for those features is checked automatically at compile -(or rather preprocessing) time, but can be explicitly enabled or disabled by -predefining the corresponding preprocessor symbols to either 1 or 0 yourself -before including half.hpp. This is useful when the automatic detection fails -(for more exotic implementations) or when a feature should be explicitly -disabled: - - - 'long long' integer type for mathematical functions returning 'long long' - results (enabled for VC++ 2003 and icc 11.1 and newer, gcc and clang, - overridable with 'HALF_ENABLE_CPP11_LONG_LONG'). - - - Static assertions for extended compile-time checks (enabled for VC++ 2010, - gcc 4.3, clang 2.9, icc 11.1 and newer, overridable with - 'HALF_ENABLE_CPP11_STATIC_ASSERT'). - - - Generalized constant expressions (enabled for VC++ 2015, gcc 4.6, clang 3.1, - icc 14.0 and newer, overridable with 'HALF_ENABLE_CPP11_CONSTEXPR'). - - - noexcept exception specifications (enabled for VC++ 2015, gcc 4.6, - clang 3.0, icc 14.0 and newer, overridable with 'HALF_ENABLE_CPP11_NOEXCEPT'). - - - User-defined literals for half-precision literals to work (enabled for - VC++ 2015, gcc 4.7, clang 3.1, icc 15.0 and newer, overridable with - 'HALF_ENABLE_CPP11_USER_LITERALS'). - - - Thread-local storage for per-thread floating-point exception flags (enabled - for VC++ 2015, gcc 4.8, clang 3.3, icc 15.0 and newer, overridable with - 'HALF_ENABLE_CPP11_THREAD_LOCAL'). - - - Type traits and template meta-programming features from - (enabled for VC++ 2010, libstdc++ 4.3, libc++ and newer, overridable with - 'HALF_ENABLE_CPP11_TYPE_TRAITS'). - - - Special integer types from (enabled for VC++ 2010, libstdc++ 4.3, - libc++ and newer, overridable with 'HALF_ENABLE_CPP11_CSTDINT'). - - - Certain C++11 single-precision mathematical functions from for - floating-point classification during conversions from higher precision types - (enabled for VC++ 2013, libstdc++ 4.3, libc++ and newer, overridable with - 'HALF_ENABLE_CPP11_CMATH'). - - - Floating-point environment control from for possible exception - propagation to the built-in floating-point platform (enabled for VC++ 2013, - libstdc++ 4.3, libc++ and newer, overridable with 'HALF_ENABLE_CPP11_CFENV'). - - - Hash functor 'std::hash' from (enabled for VC++ 2010, - libstdc++ 4.3, libc++ and newer, overridable with 'HALF_ENABLE_CPP11_HASH'). - -The library has been tested successfully with Visual C++ 2005-2015, gcc 4-8 -and clang 3-8 on 32- and 64-bit x86 systems. Please contact me if you have any -problems, suggestions or even just success testing it on other platforms. - - -DOCUMENTATION -------------- - -What follows are some general words about the usage of the library and its -implementation. For a complete documentation of its interface consult the -corresponding website http://half.sourceforge.net. You may also generate the -complete developer documentation from the library's only include file's doxygen -comments, but this is more relevant to developers rather than mere users. - -BASIC USAGE - -To make use of the library just include its only header file half.hpp, which -defines all half-precision functionality inside the 'half_float' namespace. The -actual 16-bit half-precision data type is represented by the 'half' type, which -uses the standard IEEE representation with 1 sign bit, 5 exponent bits and 11 -mantissa bits (including the hidden bit) and supports all types of special -values, like subnormal values, infinity and NaNs. This type behaves like the -built-in floating-point types as much as possible, supporting the usual -arithmetic, comparison and streaming operators, which makes its use pretty -straight-forward: - - using half_float::half; - half a(3.4), b(5); - half c = a * b; - c += 3; - if(c > a) - std::cout << c << std::endl; - -Additionally the 'half_float' namespace also defines half-precision versions -for all mathematical functions of the C++ standard library, which can be used -directly through ADL: - - half a(-3.14159); - half s = sin(abs(a)); - long l = lround(s); - -You may also specify explicit half-precision literals, since the library -provides a user-defined literal inside the 'half_float::literal' namespace, -which you just need to import (assuming support for C++11 user-defined literals): - - using namespace half_float::literal; - half x = 1.0_h; - -Furthermore the library provides proper specializations for -'std::numeric_limits', defining various implementation properties, and -'std::hash' for hashing half-precision numbers (assuming support for C++11 -'std::hash'). Similar to the corresponding preprocessor symbols from -the library also defines the 'HUGE_VALH' constant and maybe the 'FP_FAST_FMAH' -symbol. - -CONVERSIONS AND ROUNDING - -The half is explicitly constructible/convertible from a single-precision float -argument. Thus it is also explicitly constructible/convertible from any type -implicitly convertible to float, but constructing it from types like double or -int will involve the usual warnings arising when implicitly converting those to -float because of the lost precision. On the one hand those warnings are -intentional, because converting those types to half neccessarily also reduces -precision. But on the other hand they are raised for explicit conversions from -those types, when the user knows what he is doing. So if those warnings keep -bugging you, then you won't get around first explicitly converting to float -before converting to half, or use the 'half_cast' described below. In addition -you can also directly assign float values to halfs. - -In contrast to the float-to-half conversion, which reduces precision, the -conversion from half to float (and thus to any other type implicitly -convertible from float) is implicit, because all values represetable with -half-precision are also representable with single-precision. This way the -half-to-float conversion behaves similar to the builtin float-to-double -conversion and all arithmetic expressions involving both half-precision and -single-precision arguments will be of single-precision type. This way you can -also directly use the mathematical functions of the C++ standard library, -though in this case you will invoke the single-precision versions which will -also return single-precision values, which is (even if maybe performing the -exact same computation, see below) not as conceptually clean when working in a -half-precision environment. - -The default rounding mode for conversions between half and more precise types -as well as for rounding results of arithmetic operations and mathematical -functions rounds to the nearest representable value. But by predefining the -'HALF_ROUND_STYLE' preprocessor symbol this default can be overridden with one -of the other standard rounding modes using their respective constants or the -equivalent values of 'std::float_round_style' (it can even be synchronized with -the built-in single-precision implementation by defining it to -'std::numeric_limits::round_style'): - - - 'std::round_indeterminate' (-1) for the fastest rounding. - - - 'std::round_toward_zero' (0) for rounding toward zero. - - - 'std::round_to_nearest' (1) for rounding to the nearest value (default). - - - 'std::round_toward_infinity' (2) for rounding toward positive infinity. - - - 'std::round_toward_neg_infinity' (3) for rounding toward negative infinity. - -In addition to changing the overall default rounding mode one can also use the -'half_cast'. This converts between half and any built-in arithmetic type using -a configurable rounding mode (or the default rounding mode if none is -specified). In addition to a configurable rounding mode, 'half_cast' has -another big difference to a mere 'static_cast': Any conversions are performed -directly using the given rounding mode, without any intermediate conversion -to/from 'float'. This is especially relevant for conversions to integer types, -which don't necessarily truncate anymore. But also for conversions from -'double' or 'long double' this may produce more precise results than a -pre-conversion to 'float' using the single-precision implementation's current -rounding mode would. - - half a = half_cast(4.2); - half b = half_cast::round_style>(4.2f); - assert( half_cast( 0.7_h ) == 1 ); - assert( half_cast( 4097 ) == 4096.0_h ); - assert( half_cast( 4097 ) == 4100.0_h ); - assert( half_cast( std::numeric_limits::min() ) > 0.0_h ); - -ACCURACY AND PERFORMANCE - -From version 2.0 onward the library is implemented without employing the -underlying floating-point implementation of the system (except for conversions, -of course), providing an entirely self-contained half-precision implementation -with results independent from the system's existing single- or double-precision -implementation and its rounding behaviour. - -As to accuracy, many of the operators and functions provided by this library -are exact to rounding for all rounding modes, i.e. the error to the exact -result is at most 0.5 ULP (unit in the last place) for rounding to nearest and -less than 1 ULP for all other rounding modes. This holds for all the operations -required by the IEEE 754 standard and many more. Specifically the following -functions might exhibit a deviation from the correctly rounded exact result by -1 ULP for a select few input values: 'expm1', 'log1p', 'pow', 'atan2', 'erf', -'erfc', 'lgamma', 'tgamma' (for more details see the documentation of the -individual functions). All other functions and operators are always exact to -rounding or independent of the rounding mode altogether. - -The increased IEEE-conformance and cleanliness of this implementation comes -with a certain performance cost compared to doing computations and mathematical -functions in hardware-accelerated single-precision. On average and depending on -the platform, the arithemtic operators are about 75% as fast and the -mathematical functions about 33-50% as fast as performing the corresponding -operations in single-precision and converting between the inputs and outputs. -However, directly computing with half-precision values is a rather rare -use-case and usually using actual 'float' values for all computations and -temproraries and using 'half's only for storage is the recommended way. But -nevertheless the goal of this library was to provide a complete and -conceptually clean IEEE-confromant half-precision implementation and in the few -cases when you do need to compute directly in half-precision you do so for a -reason and want accurate results. - -If necessary, this internal implementation can be overridden by predefining the -'HALF_ARITHMETIC_TYPE' preprocessor symbol to one of the built-in -floating-point types ('float', 'double' or 'long double'), which will cause the -library to use this type for computing arithmetic operations and mathematical -functions (if available). However, due to using the platform's floating-point -implementation (and its rounding behaviour) internally, this might cause -results to deviate from the specified half-precision rounding mode. It will of -course also inhibit the automatic exception detection described below. - -The conversion operations between half-precision and single-precision types can -also make use of the F16C extension for x86 processors by using the -corresponding compiler intrinsics from . Support for this is -checked at compile-time by looking for the '__F16C__' macro which at least gcc -and clang define based on the target platform. It can also be enabled manually -by predefining the 'HALF_ENABLE_F16C_INTRINSICS' preprocessor symbol to 1, or 0 -for explicitly disabling it. However, this will directly use the corresponding -intrinsics for conversion without checking if they are available at runtime -(possibly crashing if they are not), so make sure they are supported on the -target platform before enabling this. - -EXCEPTION HANDLING - -The half-precision implementation supports all 5 required floating-point -exceptions from the IEEE standard to indicate erroneous inputs or inexact -results during operations. These are represented by exception flags which -actually use the same values as the corresponding 'FE_...' flags defined in -C++11's header if supported, specifically: - - - 'FE_INVALID' for invalid inputs to an operation. - - 'FE_DIVBYZERO' for finite inputs producing infinite results. - - 'FE_OVERFLOW' if a result is too large to represent finitely. - - 'FE_UNDERFLOW' for a subnormal or zero result after rounding. - - 'FE_INEXACT' if a result needed rounding to be representable. - - 'FE_ALL_EXCEPT' as a convenient OR of all possible exception flags. - -The internal exception flag state will start with all flags cleared and is -maintained per thread if C++11 thread-local storage is supported, otherwise it -will be maintained globally and will theoretically NOT be thread-safe (while -practically being as thread-safe as a simple integer variable can be). These -flags can be managed explicitly using the library's error handling functions, -which again try to mimic the built-in functions for handling floating-point -exceptions from . You can clear them with 'feclearexcept' (which is the -only way a flag can be cleared), test them with 'fetestexcept', explicitly -raise errors with 'feraiseexcept' and save and restore their state using -'fegetexceptflag' and 'fesetexceptflag'. You can also throw corresponding C++ -exceptions based on the current flag state using 'fethrowexcept'. - -However, any automatic exception detection and handling during half-precision -operations and functions is DISABLED by default, since it comes with a minor -performance overhead due to runtime checks, and reacting to IEEE floating-point -exceptions is rarely ever needed in application code. But the library fully -supports IEEE-conformant detection of floating-point exceptions and various -ways for handling them, which can be enabled by pre-defining the corresponding -preprocessor symbols to 1. They can be enabled individually or all at once and -they will be processed in the order they are listed here: - - - 'HALF_ERRHANDLING_FLAGS' sets the internal exception flags described above - whenever the corresponding exception occurs. - - 'HALF_ERRHANDLING_ERRNO' sets the value of 'errno' from similar to - the behaviour of the built-in floating-point types when 'MATH_ERRNO' is used. - - 'HALF_ERRHANDLING_FENV' will propagate exceptions to the built-in - floating-point implementation using 'std::feraiseexcept' if support for - C++11 floating-point control is enabled. However, this does not synchronize - exceptions: neither will clearing propagate nor will it work in reverse. - - 'HALF_ERRHANDLING_THROW_...' can be defined to a string literal which will - be used as description message for a C++ exception that is thrown whenever - a 'FE_...' exception occurs, similar to the behaviour of 'fethrowexcept'. - -If any of the above error handling is activated, non-quiet operations on -half-precision values will also raise a 'FE_INVALID' exception whenever -they encounter a signaling NaN value, in addition to transforming the value -into a quiet NaN. If error handling is disabled, signaling NaNs will be -treated like quiet NaNs (while still getting explicitly quieted if propagated -to the result). There can also be additional treatment of overflow and -underflow errors after they have been processed as above, which is ENABLED by -default (but of course only takes effect if any other exception handling is -activated) unless overridden by pre-defining the corresponding preprocessor -symbol to 0: - - - 'HALF_ERRHANDLING_OVERFLOW_TO_INEXACT' will cause overflow errors to also - raise a 'FE_INEXACT' exception. - - 'HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT' will cause underflow errors to also - raise a 'FE_INEXACT' exception. This will also slightly change the - behaviour of the underflow exception, which will ONLY be raised if the - result is actually inexact due to underflow. If this is disabled, underflow - exceptions will be raised for ANY (possibly exact) subnormal result. - - -CREDITS AND CONTACT -------------------- - -This library is developed by CHRISTIAN RAU and released under the MIT License -(see LICENSE.txt). If you have any questions or problems with it, feel free to -contact me at rauy@users.sourceforge.net. - -Additional credit goes to JEROEN VAN DER ZIJP for his paper on "Fast Half Float -Conversions", whose algorithms have been used in the library for converting -between half-precision and single-precision values. diff --git a/external_libs/external/half/include/half/half.hpp b/external_libs/external/half/include/half/half.hpp deleted file mode 100644 index f4d861463..000000000 --- a/external_libs/external/half/include/half/half.hpp +++ /dev/null @@ -1,4601 +0,0 @@ -// half - IEEE 754-based half-precision floating-point library. -// -// Copyright (c) 2012-2021 Christian Rau -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -// Version 2.2.0 - -/// \file -/// Main header file for half-precision functionality. - -#ifndef HALF_HALF_HPP -#define HALF_HALF_HPP - -#define HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__) - -#if defined(__INTEL_COMPILER) - #define HALF_ICC_VERSION __INTEL_COMPILER -#elif defined(__ICC) - #define HALF_ICC_VERSION __ICC -#elif defined(__ICL) - #define HALF_ICC_VERSION __ICL -#else - #define HALF_ICC_VERSION 0 -#endif - -// check C++11 language features -#if defined(__clang__) // clang - #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if __has_feature(cxx_thread_local) && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif -#elif HALF_ICC_VERSION && defined(__INTEL_CXX11_MODE__) // Intel C++ - #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif -#elif defined(__GNUC__) // gcc - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L - #if HALF_GCC_VERSION >= 408 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if HALF_GCC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #endif - #define HALF_TWOS_COMPLEMENT_INT 1 -#elif defined(_MSC_VER) // Visual C++ - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #define HALF_TWOS_COMPLEMENT_INT 1 - #define HALF_POP_WARNINGS 1 - #pragma warning(push) - #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned -#endif - -// check C++11 library features -#include -#if defined(_LIBCPP_VERSION) // libc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #ifndef HALF_ENABLE_CPP11_CSTDINT - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #ifndef HALF_ENABLE_CPP11_CMATH - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #ifndef HALF_ENABLE_CPP11_HASH - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #ifndef HALF_ENABLE_CPP11_CFENV - #define HALF_ENABLE_CPP11_CFENV 1 - #endif - #endif -#elif defined(__GLIBCXX__) // libstdc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifdef __clang__ - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CFENV) - #define HALF_ENABLE_CPP11_CFENV 1 - #endif - #else - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CFENV) - #define HALF_ENABLE_CPP11_CFENV 1 - #endif - #endif - #endif -#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ - #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CFENV) - #define HALF_ENABLE_CPP11_CFENV 1 - #endif -#endif -#undef HALF_GCC_VERSION -#undef HALF_ICC_VERSION - -// any error throwing C++ exceptions? -#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT) -#define HALF_ERRHANDLING_THROWS 1 -#endif - -// any error handling enabled? -#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) - -#if HALF_ERRHANDLING - #define HALF_UNUSED_NOERR(name) name -#else - #define HALF_UNUSED_NOERR(name) -#endif - -// support constexpr -#if HALF_ENABLE_CPP11_CONSTEXPR - #define HALF_CONSTEXPR constexpr - #define HALF_CONSTEXPR_CONST constexpr - #if HALF_ERRHANDLING - #define HALF_CONSTEXPR_NOERR - #else - #define HALF_CONSTEXPR_NOERR constexpr - #endif -#else - #define HALF_CONSTEXPR - #define HALF_CONSTEXPR_CONST const - #define HALF_CONSTEXPR_NOERR -#endif - -// support noexcept -#if HALF_ENABLE_CPP11_NOEXCEPT - #define HALF_NOEXCEPT noexcept - #define HALF_NOTHROW noexcept -#else - #define HALF_NOEXCEPT - #define HALF_NOTHROW throw() -#endif - -// support thread storage -#if HALF_ENABLE_CPP11_THREAD_LOCAL - #define HALF_THREAD_LOCAL thread_local -#else - #define HALF_THREAD_LOCAL static -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if HALF_ENABLE_CPP11_TYPE_TRAITS - #include -#endif -#if HALF_ENABLE_CPP11_CSTDINT - #include -#endif -#if HALF_ERRHANDLING_ERRNO - #include -#endif -#if HALF_ENABLE_CPP11_CFENV - #include -#endif -#if HALF_ENABLE_CPP11_HASH - #include -#endif - - -#ifndef HALF_ENABLE_F16C_INTRINSICS - /// Enable F16C intruction set intrinsics. - /// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting between - /// half-precision and single-precision values which may result in improved performance. This will not perform additional checks - /// for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature. - /// - /// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms. - #define HALF_ENABLE_F16C_INTRINSICS __F16C__ -#endif -#if HALF_ENABLE_F16C_INTRINSICS - #include -#endif - -#ifdef HALF_DOXYGEN_ONLY -/// Type for internal floating-point computations. -/// This can be predefined to a built-in floating-point type (`float`, `double` or `long double`) to override the internal -/// half-precision implementation to use this type for computing arithmetic operations and mathematical function (if available). -/// This can result in improved performance for arithmetic operators and mathematical functions but might cause results to -/// deviate from the specified half-precision rounding mode and inhibits proper detection of half-precision exceptions. -#define HALF_ARITHMETIC_TYPE (undefined) - -/// Enable internal exception flags. -/// Defining this to 1 causes operations on half-precision values to raise internal floating-point exception flags according to -/// the IEEE 754 standard. These can then be cleared and checked with clearexcept(), testexcept(). -#define HALF_ERRHANDLING_FLAGS 0 - -/// Enable exception propagation to `errno`. -/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to -/// [errno](https://en.cppreference.com/w/cpp/error/errno) from ``. Specifically this will propagate domain errors as -/// [EDOM](https://en.cppreference.com/w/cpp/error/errno_macros) and pole, overflow and underflow errors as -/// [ERANGE](https://en.cppreference.com/w/cpp/error/errno_macros). Inexact errors won't be propagated. -#define HALF_ERRHANDLING_ERRNO 0 - -/// Enable exception propagation to built-in floating-point platform. -/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to the built-in -/// single- and double-precision implementation's exception flags using the -/// [C++11 floating-point environment control](https://en.cppreference.com/w/cpp/numeric/fenv) from ``. However, this -/// does not work in reverse and single- or double-precision exceptions will not raise the corresponding half-precision -/// exception flags, nor will explicitly clearing flags clear the corresponding built-in flags. -#define HALF_ERRHANDLING_FENV 0 - -/// Throw C++ exception on domain errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on domain errors. -#define HALF_ERRHANDLING_THROW_INVALID (undefined) - -/// Throw C++ exception on pole errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on pole errors. -#define HALF_ERRHANDLING_THROW_DIVBYZERO (undefined) - -/// Throw C++ exception on overflow errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::overflow_error](https://en.cppreference.com/w/cpp/error/overflow_error) with the specified message on overflows. -#define HALF_ERRHANDLING_THROW_OVERFLOW (undefined) - -/// Throw C++ exception on underflow errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::underflow_error](https://en.cppreference.com/w/cpp/error/underflow_error) with the specified message on underflows. -#define HALF_ERRHANDLING_THROW_UNDERFLOW (undefined) - -/// Throw C++ exception on rounding errors. -/// Defining this to 1 causes operations on half-precision values to throw a -/// [std::range_error](https://en.cppreference.com/w/cpp/error/range_error) with the specified message on general rounding errors. -#define HALF_ERRHANDLING_THROW_INEXACT (undefined) -#endif - -#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT -/// Raise INEXACT exception on overflow. -/// Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. -/// These will be raised after any possible handling of the underflow exception. -#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 -#endif - -#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT -/// Raise INEXACT exception on underflow. -/// Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. -/// These will be raised after any possible handling of the underflow exception. -/// -/// **Note:** This will actually cause underflow (and the accompanying inexact) exceptions to be raised *only* when the result -/// is inexact, while if disabled bare underflow errors will be raised for *any* (possibly exact) subnormal result. -#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 -#endif - -/// Default rounding mode. -/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and more precise types -/// (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical -/// functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective -/// constants or the equivalent values of -/// [std::float_round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/float_round_style): -/// -/// `std::float_round_style` | value | rounding -/// ---------------------------------|-------|------------------------- -/// `std::round_indeterminate` | -1 | fastest -/// `std::round_toward_zero` | 0 | toward zero -/// `std::round_to_nearest` | 1 | to nearest (default) -/// `std::round_toward_infinity` | 2 | toward positive infinity -/// `std::round_toward_neg_infinity` | 3 | toward negative infinity -/// -/// By default this is set to `1` (`std::round_to_nearest`), which rounds results to the nearest representable value. It can even -/// be set to [std::numeric_limits::round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/round_style) to synchronize -/// the rounding mode with that of the built-in single-precision implementation (which is likely `std::round_to_nearest`, though). -#ifndef HALF_ROUND_STYLE - #define HALF_ROUND_STYLE 1 // = std::round_to_nearest -#endif - -/// Value signaling overflow. -/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow of an -/// operation, in particular it just evaluates to positive infinity. -/// -/// **See also:** Documentation for [HUGE_VAL](https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL) -#define HUGE_VALH std::numeric_limits::infinity() - -/// Fast half-precision fma function. -/// This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate -/// half-precision multiplication followed by an addition, which is always the case. -/// -/// **See also:** Documentation for [FP_FAST_FMA](https://en.cppreference.com/w/cpp/numeric/math/fma) -#define FP_FAST_FMAH 1 - -/// Half rounding mode. -/// In correspondence with `FLT_ROUNDS` from `` this symbol expands to the rounding mode used for -/// half-precision operations. It is an alias for [HALF_ROUND_STYLE](\ref HALF_ROUND_STYLE). -/// -/// **See also:** Documentation for [FLT_ROUNDS](https://en.cppreference.com/w/cpp/types/climits/FLT_ROUNDS) -#define HLF_ROUNDS HALF_ROUND_STYLE - -#ifndef FP_ILOGB0 - #define FP_ILOGB0 INT_MIN -#endif -#ifndef FP_ILOGBNAN - #define FP_ILOGBNAN INT_MAX -#endif -#ifndef FP_SUBNORMAL - #define FP_SUBNORMAL 0 -#endif -#ifndef FP_ZERO - #define FP_ZERO 1 -#endif -#ifndef FP_NAN - #define FP_NAN 2 -#endif -#ifndef FP_INFINITE - #define FP_INFINITE 3 -#endif -#ifndef FP_NORMAL - #define FP_NORMAL 4 -#endif - -#if !HALF_ENABLE_CPP11_CFENV && !defined(FE_ALL_EXCEPT) - #define FE_INVALID 0x10 - #define FE_DIVBYZERO 0x08 - #define FE_OVERFLOW 0x04 - #define FE_UNDERFLOW 0x02 - #define FE_INEXACT 0x01 - #define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) -#endif - - -/// Main namespace for half-precision functionality. -/// This namespace contains all the functionality provided by the library. -namespace half_float -{ - class half; - -#if HALF_ENABLE_CPP11_USER_LITERALS - /// Library-defined half-precision literals. - /// Import this namespace to enable half-precision floating-point literals: - /// ~~~~{.cpp} - /// using namespace half_float::literal; - /// half_float::half = 4.2_h; - /// ~~~~ - namespace literal - { - half operator "" _h(long double); - } -#endif - - /// \internal - /// \brief Implementation details. - namespace detail - { - #if HALF_ENABLE_CPP11_TYPE_TRAITS - /// Conditional type. - template struct conditional : std::conditional {}; - - /// Helper for tag dispatching. - template struct bool_type : std::integral_constant {}; - using std::true_type; - using std::false_type; - - /// Type traits for floating-point types. - template struct is_float : std::is_floating_point {}; - #else - /// Conditional type. - template struct conditional { typedef T type; }; - template struct conditional { typedef F type; }; - - /// Helper for tag dispatching. - template struct bool_type {}; - typedef bool_type true_type; - typedef bool_type false_type; - - /// Type traits for floating-point types. - template struct is_float : false_type {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - #endif - - /// Type traits for floating-point bits. - template struct bits { typedef unsigned char type; }; - template struct bits : bits {}; - template struct bits : bits {}; - template struct bits : bits {}; - - #if HALF_ENABLE_CPP11_CSTDINT - /// Unsigned integer of (at least) 16 bits width. - typedef std::uint_least16_t uint16; - - /// Fastest unsigned integer of (at least) 32 bits width. - typedef std::uint_fast32_t uint32; - - /// Fastest signed integer of (at least) 32 bits width. - typedef std::int_fast32_t int32; - - /// Unsigned integer of (at least) 32 bits width. - template<> struct bits { typedef std::uint_least32_t type; }; - - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits { typedef std::uint_least64_t type; }; - #else - /// Unsigned integer of (at least) 16 bits width. - typedef unsigned short uint16; - - /// Fastest unsigned integer of (at least) 32 bits width. - typedef unsigned long uint32; - - /// Fastest unsigned integer of (at least) 32 bits width. - typedef long int32; - - /// Unsigned integer of (at least) 32 bits width. - template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; - - #if HALF_ENABLE_CPP11_LONG_LONG - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; - #else - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits { typedef unsigned long type; }; - #endif - #endif - - #ifdef HALF_ARITHMETIC_TYPE - /// Type to use for arithmetic computations and mathematic functions internally. - typedef HALF_ARITHMETIC_TYPE internal_t; - #endif - - /// Tag type for binary construction. - struct binary_t {}; - - /// Tag for binary construction. - HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - - /// \name Implementation defined classification and arithmetic - /// \{ - - /// Check for infinity. - /// \tparam T argument type (builtin floating-point type) - /// \param arg value to query - /// \retval true if infinity - /// \retval false else - template bool builtin_isinf(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isinf(arg); - #elif defined(_MSC_VER) - return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); - #else - return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); - #endif - } - - /// Check for NaN. - /// \tparam T argument type (builtin floating-point type) - /// \param arg value to query - /// \retval true if not a number - /// \retval false else - template bool builtin_isnan(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isnan(arg); - #elif defined(_MSC_VER) - return ::_isnan(static_cast(arg)) != 0; - #else - return arg != arg; - #endif - } - - /// Check sign. - /// \tparam T argument type (builtin floating-point type) - /// \param arg value to query - /// \retval true if signbit set - /// \retval false else - template bool builtin_signbit(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::signbit(arg); - #else - return arg < T() || (arg == T() && T(1)/arg < T()); - #endif - } - - /// Platform-independent sign mask. - /// \param arg integer value in two's complement - /// \retval -1 if \a arg negative - /// \retval 0 if \a arg positive - inline uint32 sign_mask(uint32 arg) - { - static const int N = std::numeric_limits::digits - 1; - #if HALF_TWOS_COMPLEMENT_INT - return static_cast(arg) >> N; - #else - return -((arg>>N)&1); - #endif - } - - /// Platform-independent arithmetic right shift. - /// \param arg integer value in two's complement - /// \param i shift amount (at most 31) - /// \return \a arg right shifted for \a i bits with possible sign extension - inline uint32 arithmetic_shift(uint32 arg, int i) - { - #if HALF_TWOS_COMPLEMENT_INT - return static_cast(arg) >> i; - #else - return static_cast(arg)/(static_cast(1)<>(std::numeric_limits::digits-1))&1); - #endif - } - - /// \} - /// \name Error handling - /// \{ - - /// Internal exception flags. - /// \return reference to global exception flags - inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; } - - /// Raise floating-point exception. - /// \param flags exceptions to raise - /// \param cond condition to raise exceptions for - inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) - { - #if HALF_ERRHANDLING - if(!cond) - return; - #if HALF_ERRHANDLING_FLAGS - errflags() |= flags; - #endif - #if HALF_ERRHANDLING_ERRNO - if(flags & FE_INVALID) - errno = EDOM; - else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW)) - errno = ERANGE; - #endif - #if HALF_ERRHANDLING_FENV && HALF_ENABLE_CPP11_CFENV - std::feraiseexcept(flags); - #endif - #ifdef HALF_ERRHANDLING_THROW_INVALID - if(flags & FE_INVALID) - throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID); - #endif - #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO - if(flags & FE_DIVBYZERO) - throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO); - #endif - #ifdef HALF_ERRHANDLING_THROW_OVERFLOW - if(flags & FE_OVERFLOW) - throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW); - #endif - #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW - if(flags & FE_UNDERFLOW) - throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW); - #endif - #ifdef HALF_ERRHANDLING_THROW_INEXACT - if(flags & FE_INEXACT) - throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT); - #endif - #if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT - if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT)) - raise(FE_INEXACT); - #endif - #if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT - if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT)) - raise(FE_INEXACT); - #endif - #endif - } - - /// Check and signal for any NaN. - /// \param x first half-precision value to check - /// \param y second half-precision value to check - /// \retval true if either \a x or \a y is NaN - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN - inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00); - #endif - return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00; - } - - /// Signal and silence signaling NaN. - /// \param nan half-precision NaN value - /// \return quiet NaN - /// \exception FE_INVALID if \a nan is signaling NaN - inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, !(nan&0x200)); - #endif - return nan | 0x200; - } - - /// Signal and silence signaling NaNs. - /// \param x first half-precision value to check - /// \param y second half-precision value to check - /// \return quiet NaN - /// \exception FE_INVALID if \a x or \a y is signaling NaN - inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200))); - #endif - return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200); - } - - /// Signal and silence signaling NaNs. - /// \param x first half-precision value to check - /// \param y second half-precision value to check - /// \param z third half-precision value to check - /// \return quiet NaN - /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN - inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200))); - #endif - return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200); - } - - /// Select value or signaling NaN. - /// \param x preferred half-precision value - /// \param y ignored half-precision value except for signaling NaN - /// \return \a y if signaling NaN, \a x otherwise - /// \exception FE_INVALID if \a y is signaling NaN - inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) - { - #if HALF_ERRHANDLING - return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x; - #else - return x; - #endif - } - - /// Raise domain error and return NaN. - /// return quiet NaN - /// \exception FE_INVALID - inline HALF_CONSTEXPR_NOERR unsigned int invalid() - { - #if HALF_ERRHANDLING - raise(FE_INVALID); - #endif - return 0x7FFF; - } - - /// Raise pole error and return infinity. - /// \param sign half-precision value with sign bit only - /// \return half-precision infinity with sign of \a sign - /// \exception FE_DIVBYZERO - inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0) - { - #if HALF_ERRHANDLING - raise(FE_DIVBYZERO); - #endif - return sign | 0x7C00; - } - - /// Check value for underflow. - /// \param arg non-zero half-precision value to check - /// \return \a arg - /// \exception FE_UNDERFLOW if arg is subnormal - inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg) - { - #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT - raise(FE_UNDERFLOW, !(arg&0x7C00)); - #endif - return arg; - } - - /// \} - /// \name Conversion and rounding - /// \{ - - /// Half-precision overflow. - /// \tparam R rounding mode to use - /// \param sign half-precision value with sign bit only - /// \return rounded overflowing half-precision value - /// \exception FE_OVERFLOW - template HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0) - { - #if HALF_ERRHANDLING - raise(FE_OVERFLOW); - #endif - return (R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) : - (R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) : - (R==std::round_toward_zero) ? (sign|0x7BFF) : - (sign|0x7C00); - } - - /// Half-precision underflow. - /// \tparam R rounding mode to use - /// \param sign half-precision value with sign bit only - /// \return rounded underflowing half-precision value - /// \exception FE_UNDERFLOW - template HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0) - { - #if HALF_ERRHANDLING - raise(FE_UNDERFLOW); - #endif - return (R==std::round_toward_infinity) ? (sign+1-(sign>>15)) : - (R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) : - sign; - } - - /// Round half-precision number. - /// \tparam R rounding mode to use - /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results - /// \param value finite half-precision number to round - /// \param g guard bit (most significant discarded bit) - /// \param s sticky bit (or of all but the most significant discarded bits) - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded or \a I is `true` - template HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s) - { - #if HALF_ERRHANDLING - value += (R==std::round_to_nearest) ? (g&(s|value)) : - (R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) : - (R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0; - if((value&0x7C00) == 0x7C00) - raise(FE_OVERFLOW); - else if(value & 0x7C00) - raise(FE_INEXACT, I || (g|s)!=0); - else - raise(FE_UNDERFLOW, !(HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT) || I || (g|s)!=0); - return value; - #else - return (R==std::round_to_nearest) ? (value+(g&(s|value))) : - (R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) : - (R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) : - value; - #endif - } - - /// Round half-precision number to nearest integer value. - /// \tparam R rounding mode to use - /// \tparam E `true` for round to even, `false` for round away from zero - /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it - /// \param value half-precision value to round - /// \return half-precision bits for nearest integral value - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded and \a I is `true` - template unsigned int integral(unsigned int value) - { - unsigned int abs = value & 0x7FFF; - if(abs < 0x3C00) - { - raise(FE_INEXACT, I); - return ((R==std::round_to_nearest) ? (0x3C00&-static_cast(abs>=(0x3800+E))) : - (R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) : - (R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast(value>0x8000)) : - 0) | (value&0x8000); - } - if(abs >= 0x6400) - return (abs>0x7C00) ? signal(value) : value; - unsigned int exp = 25 - (abs>>10), mask = (1<>exp)&E)) : - (R==std::round_toward_infinity) ? (mask&((value>>15)-1)) : - (R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) : - 0) + value) & ~mask; - } - - /// Convert fixed point to half-precision floating-point. - /// \tparam R rounding mode to use - /// \tparam F number of fractional bits in [11,31] - /// \tparam S `true` for signed, `false` for unsigned - /// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F - /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results - /// \param m mantissa in Q1.F fixed point format - /// \param exp biased exponent - 1 - /// \param sign half-precision value with sign bit only - /// \param s sticky bit (or of all but the most significant already discarded bits) - /// \return value converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded or \a I is `true` - template unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) - { - if(S) - { - uint32 msign = sign_mask(m); - m = (m^msign) - msign; - sign = msign & 0x8000; - } - if(N) - for(; m<(static_cast(1)<(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast(1)<<(F-11-exp))-1))!=0)); - return rounded(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast(1)<<(F-11))-1))!=0)); - } - - /// Convert IEEE single-precision to half-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \tparam R rounding mode to use - /// \param value single-precision value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded - template unsigned int float2half_impl(float value, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), - (R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT : - (R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO : - (R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF : - (R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF : - _MM_FROUND_CUR_DIRECTION)); - #else - bits::type fbits; - std::memcpy(&fbits, &value, sizeof(float)); - #if 1 - unsigned int sign = (fbits>>16) & 0x8000; - fbits &= 0x7FFFFFFF; - if(fbits >= 0x7F800000) - return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0); - if(fbits >= 0x47800000) - return overflow(sign); - if(fbits >= 0x38800000) - return rounded(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0); - if(fbits >= 0x33000000) - { - int i = 125 - (fbits>>23); - fbits = (fbits&0x7FFFFF) | 0x800000; - return rounded(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast(1)<(sign); - return sign; - #else - static const uint16 base_table[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, - 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, - 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, - 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, - 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 }; - static const unsigned char shift_table[256] = { - 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; - int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp]; - fbits &= 0x7FFFFF; - uint32 m = (fbits|((exp!=0)<<23)) & -static_cast(exp!=0xFF); - return rounded(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast(1)<<(i-1))-1)&m)!=0); - #endif - #endif - } - - /// Convert IEEE double-precision to half-precision. - /// \tparam R rounding mode to use - /// \param value double-precision value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded - template unsigned int float2half_impl(double value, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - if(R == std::round_indeterminate) - return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION)); - #endif - bits::type dbits; - std::memcpy(&dbits, &value, sizeof(double)); - uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF; - unsigned int sign = (hi>>16) & 0x8000; - hi &= 0x7FFFFFFF; - if(hi >= 0x7FF00000) - return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0); - if(hi >= 0x40F00000) - return overflow(sign); - if(hi >= 0x3F100000) - return rounded(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0); - if(hi >= 0x3E600000) - { - int i = 1018 - (hi>>20); - hi = (hi&0xFFFFF) | 0x100000; - return rounded(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast(1)<(sign); - return sign; - } - - /// Convert non-IEEE floating-point to half-precision. - /// \tparam R rounding mode to use - /// \tparam T source type (builtin floating-point type) - /// \param value floating-point value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded - template unsigned int float2half_impl(T value, ...) - { - unsigned int hbits = static_cast(builtin_signbit(value)) << 15; - if(value == T()) - return hbits; - if(builtin_isnan(value)) - return hbits | 0x7FFF; - if(builtin_isinf(value)) - return hbits | 0x7C00; - int exp; - std::frexp(value, &exp); - if(exp > 16) - return overflow(hbits); - if(exp < -13) - value = std::ldexp(value, 25); - else - { - value = std::ldexp(value, 12-exp); - hbits |= ((exp+13)<<10); - } - T ival, frac = std::modf(value, &ival); - int m = std::abs(static_cast(ival)); - return rounded(hbits+(m>>1), m&1, frac!=T()); - } - - /// Convert floating-point to half-precision. - /// \tparam R rounding mode to use - /// \tparam T source type (builtin floating-point type) - /// \param value floating-point value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded - template unsigned int float2half(T value) - { - return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } - - /// Convert integer to half-precision floating-point. - /// \tparam R rounding mode to use - /// \tparam T type to convert (builtin integer type) - /// \param value integral value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_INEXACT if value had to be rounded - template unsigned int int2half(T value) - { - unsigned int bits = static_cast(value<0) << 15; - if(!value) - return bits; - if(bits) - value = -value; - if(value > 0xFFFF) - return overflow(bits); - unsigned int m = static_cast(value), exp = 24; - for(; m<0x400; m<<=1,--exp) ; - for(; m>0x7FF; m>>=1,++exp) ; - bits |= (exp<<10) + m; - return (exp>24) ? rounded(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits; - } - - /// Convert half-precision to IEEE single-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \param value half-precision value to convert - /// \return single-precision value - inline float half2float_impl(unsigned int value, float, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value))); - #else - #if 0 - bits::type fbits = static_cast::type>(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - fbits |= 0x38000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,fbits-=0x800000) ; - fbits += static_cast::type>(abs) << 13; - } - #else - static const bits::type mantissa_table[2048] = { - 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, - 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, - 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, - 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, - 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, - 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, - 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, - 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, - 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, - 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, - 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, - 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, - 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, - 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, - 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, - 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, - 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, - 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, - 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, - 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, - 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, - 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, - 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, - 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, - 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, - 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, - 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, - 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, - 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, - 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, - 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, - 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, - 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, - 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, - 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, - 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, - 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, - 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, - 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, - 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, - 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, - 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, - 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, - 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, - 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, - 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, - 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, - 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, - 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, - 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, - 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, - 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, - 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, - 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, - 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, - 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, - 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, - 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, - 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, - 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, - 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, - 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, - 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, - 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, - 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, - 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, - 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, - 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, - 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, - 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, - 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, - 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, - 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, - 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, - 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, - 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, - 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, - 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, - 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, - 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, - 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, - 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, - 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, - 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, - 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, - 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, - 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, - 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, - 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, - 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, - 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, - 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, - 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, - 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, - 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, - 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, - 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, - 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, - 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, - 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, - 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, - 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, - 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, - 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, - 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, - 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, - 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, - 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, - 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, - 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, - 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, - 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, - 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, - 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, - 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, - 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, - 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, - 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, - 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, - 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, - 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, - 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, - 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; - static const bits::type exponent_table[64] = { - 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, - 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, - 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, - 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; - static const unsigned short offset_table[64] = { - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; - bits::type fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; - #endif - float out; - std::memcpy(&out, &fbits, sizeof(float)); - return out; - #endif - } - - /// Convert half-precision to IEEE double-precision. - /// \param value half-precision value to convert - /// \return double-precision value - inline double half2float_impl(unsigned int value, double, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value)))); - #else - uint32 hi = static_cast(value&0x8000) << 16; - unsigned int abs = value & 0x7FFF; - if(abs) - { - hi |= 0x3F000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,hi-=0x100000) ; - hi += static_cast(abs) << 10; - } - bits::type dbits = static_cast::type>(hi) << 32; - double out; - std::memcpy(&out, &dbits, sizeof(double)); - return out; - #endif - } - - /// Convert half-precision to non-IEEE floating-point. - /// \tparam T type to convert to (builtin integer type) - /// \param value half-precision value to convert - /// \return floating-point value - template T half2float_impl(unsigned int value, T, ...) - { - T out; - unsigned int abs = value & 0x7FFF; - if(abs > 0x7C00) - out = (std::numeric_limits::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits::signaling_NaN() : - std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); - else if(abs == 0x7C00) - out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); - else if(abs > 0x3FF) - out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); - else - out = std::ldexp(static_cast(abs), -24); - return (value&0x8000) ? -out : out; - } - - /// Convert half-precision to floating-point. - /// \tparam T type to convert to (builtin integer type) - /// \param value half-precision value to convert - /// \return floating-point value - template T half2float(unsigned int value) - { - return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } - - /// Convert half-precision floating-point to integer. - /// \tparam R rounding mode to use - /// \tparam E `true` for round to even, `false` for round away from zero - /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value half-precision value to convert - /// \return rounded integer value - /// \exception FE_INVALID if value is not representable in type \a T - /// \exception FE_INEXACT if value had to be rounded and \a I is `true` - template T half2int(unsigned int value) - { - unsigned int abs = value & 0x7FFF; - if(abs >= 0x7C00) - { - raise(FE_INVALID); - return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); - } - if(abs < 0x3800) - { - raise(FE_INEXACT, I); - return (R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) : - (R==std::round_toward_neg_infinity) ? -T(value>0x8000) : - T(); - } - int exp = 25 - (abs>>10); - unsigned int m = (value&0x3FF) | 0x400; - int32 i = static_cast((exp<=0) ? (m<<-exp) : ((m+( - (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) : - (R==std::round_toward_infinity) ? (((1<>15)-1)) : - (R==std::round_toward_neg_infinity) ? (((1<>15)) : 0))>>exp)); - if((!std::numeric_limits::is_signed && (value&0x8000)) || (std::numeric_limits::digits<16 && - ((value&0x8000) ? (-i::min()) : (i>std::numeric_limits::max())))) - raise(FE_INVALID); - else if(I && exp > 0 && (m&((1<((value&0x8000) ? -i : i); - } - - /// \} - /// \name Mathematics - /// \{ - - /// upper part of 64-bit multiplication. - /// \tparam R rounding mode to use - /// \param x first factor - /// \param y second factor - /// \return upper 32 bit of \a x * \a y - template uint32 mulhi(uint32 x, uint32 y) - { - uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16); - return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) + - ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0); - } - - /// 64-bit multiplication. - /// \param x first factor - /// \param y second factor - /// \return upper 32 bit of \a x * \a y rounded to nearest - inline uint32 multiply64(uint32 x, uint32 y) - { - #if HALF_ENABLE_CPP11_LONG_LONG - return static_cast((static_cast(x)*static_cast(y)+0x80000000)>>32); - #else - return mulhi(x, y); - #endif - } - - /// 64-bit division. - /// \param x upper 32 bit of dividend - /// \param y divisor - /// \param s variable to store sticky bit for rounding - /// \return (\a x << 32) / \a y - inline uint32 divide64(uint32 x, uint32 y, int &s) - { - #if HALF_ENABLE_CPP11_LONG_LONG - unsigned long long xx = static_cast(x) << 32; - return s = (xx%y!=0), static_cast(xx/y); - #else - y >>= 1; - uint32 rem = x, div = 0; - for(unsigned int i=0; i<32; ++i) - { - div <<= 1; - if(rem >= y) - { - rem -= y; - div |= 1; - } - rem <<= 1; - } - return s = rem > 1, div; - #endif - } - - /// Half precision positive modulus. - /// \tparam Q `true` to compute full quotient, `false` else - /// \tparam R `true` to compute signed remainder, `false` for positive remainder - /// \param x first operand as positive finite half-precision value - /// \param y second operand as positive finite half-precision value - /// \param quo adress to store quotient at, `nullptr` if \a Q `false` - /// \return modulus of \a x / \a y - template unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) - { - unsigned int q = 0; - if(x > y) - { - int absx = x, absy = y, expx = 0, expy = 0; - for(; absx<0x400; absx<<=1,--expx) ; - for(; absy<0x400; absy<<=1,--expy) ; - expx += absx >> 10; - expy += absy >> 10; - int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; - for(int d=expx-expy; d; --d) - { - if(!Q && mx == my) - return 0; - if(mx >= my) - { - mx -= my; - q += Q; - } - mx <<= 1; - q <<= static_cast(Q); - } - if(!Q && mx == my) - return 0; - if(mx >= my) - { - mx -= my; - ++q; - } - if(Q) - { - q &= (1<<(std::numeric_limits::digits-1)) - 1; - if(!mx) - return *quo = q, 0; - } - for(; mx<0x400; mx<<=1,--expy) ; - x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy)); - } - if(R) - { - unsigned int a, b; - if(y < 0x800) - { - a = (x<0x400) ? (x<<1) : (x+0x400); - b = y; - } - else - { - a = x; - b = y - 0x400; - } - if(a > b || (a == b && (q&1))) - { - int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF); - int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d)); - for(; m<0x800 && exp>1; m<<=1,--exp) ; - x = 0x8000 + ((exp-1)<<10) + (m>>1); - q += Q; - } - } - if(Q) - *quo = q; - return x; - } - - /// Fixed point square root. - /// \tparam F number of fractional bits - /// \param r radicand in Q1.F fixed point format - /// \param exp exponent - /// \return square root as Q1.F/2 - template uint32 sqrt(uint32 &r, int &exp) - { - int i = exp & 1; - r <<= i; - exp = (exp-i) / 2; - uint32 m = 0; - for(uint32 bit=static_cast(1)<>=2) - { - if(r < m+bit) - m >>= 1; - else - { - r -= m + bit; - m = (m>>1) + bit; - } - } - return m; - } - - /// Fixed point binary exponential. - /// This uses the BKM algorithm in E-mode. - /// \param m exponent in [0,1) as Q0.31 - /// \param n number of iterations (at most 32) - /// \return 2 ^ \a m as Q1.31 - inline uint32 exp2(uint32 m, unsigned int n = 32) - { - static const uint32 logs[] = { - 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, - 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, - 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, - 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; - if(!m) - return 0x80000000; - uint32 mx = 0x80000000, my = 0; - for(unsigned int i=1; i> i; - } - } - return mx; - } - - /// Fixed point binary logarithm. - /// This uses the BKM algorithm in L-mode. - /// \param m mantissa in [1,2) as Q1.30 - /// \param n number of iterations (at most 32) - /// \return log2(\a m) as Q0.31 - inline uint32 log2(uint32 m, unsigned int n = 32) - { - static const uint32 logs[] = { - 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, - 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, - 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, - 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; - if(m == 0x40000000) - return 0; - uint32 mx = 0x40000000, my = 0; - for(unsigned int i=1; i>i); - if(mz <= m) - { - mx = mz; - my += logs[i]; - } - } - return my; - } - - /// Fixed point sine and cosine. - /// This uses the CORDIC algorithm in rotation mode. - /// \param mz angle in [-pi/2,pi/2] as Q1.30 - /// \param n number of iterations (at most 31) - /// \return sine and cosine of \a mz as Q1.30 - inline std::pair sincos(uint32 mz, unsigned int n = 31) - { - static const uint32 angles[] = { - 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55, - 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000, - 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080, - 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 }; - uint32 mx = 0x26DD3B6A, my = 0; - for(unsigned int i=0; i0x3FF)<<10); - int exp = (abs>>10) + (abs<=0x3FF) - 15; - if(abs < 0x3A48) - return k = 0, m << (exp+20); - #if HALF_ENABLE_CPP11_LONG_LONG - unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi; - uint32 sign = -static_cast(f>>63); - k = static_cast(yi>>(62-exp)); - return (multiply64(static_cast((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign; - #else - uint32 yh = m*0xA2F98 + mulhi(m, 0x36E4E442), yl = (m*0x36E4E442) & 0xFFFFFFFF; - uint32 mask = (static_cast(1)<<(30-exp)) - 1, yi = (yh+(mask>>1)) & ~mask, sign = -static_cast(yi>yh); - k = static_cast(yi>>(30-exp)); - uint32 fh = (yh^sign) + (yi^~sign) - ~sign, fl = (yl^sign) - sign; - return (multiply64((exp>-1) ? (((fh<<(1+exp))&0xFFFFFFFF)|((fl&0xFFFFFFFF)>>(31-exp))) : fh, 0xC90FDAA2)^sign) - sign; - #endif - } - - /// Get arguments for atan2 function. - /// \param abs half-precision floating-point value - /// \return \a abs and sqrt(1 - \a abs^2) as Q0.30 - inline std::pair atan2_args(unsigned int abs) - { - int exp = -15; - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my; - int rexp = 2 * exp; - r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast(1)<<-rexp)-1))!=0)) : 1); - for(rexp=0; r<0x40000000; r<<=1,--rexp) ; - uint32 mx = sqrt<30>(r, rexp); - int d = exp - rexp; - if(d < 0) - return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx); - if(d > 0) - return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx))); - return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx); - } - - /// Get exponentials for hyperbolic computation - /// \param abs half-precision floating-point value - /// \param exp variable to take unbiased exponent of larger result - /// \param n number of BKM iterations (at most 32) - /// \return exp(abs) and exp(-\a abs) as Q1.31 with same exponent - inline std::pair hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) - { - uint32 mx = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my; - int e = (abs>>10) + (abs<=0x3FF); - if(e < 14) - { - exp = 0; - mx >>= 14 - e; - } - else - { - exp = mx >> (45-e); - mx = (mx<<(e-14)) & 0x7FFFFFFF; - } - mx = exp2(mx, n); - int d = exp << 1, s; - if(mx > 0x80000000) - { - my = divide64(0x80000000, mx, s); - my |= s; - ++d; - } - else - my = mx; - return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast(1)< unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32) - { - if(esign) - { - exp = -exp - (m!=0); - if(exp < -25) - return underflow(sign); - else if(exp == -25) - return rounded(sign, 1, m!=0); - } - else if(exp > 15) - return overflow(sign); - if(!m) - return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp)); - m = exp2(m, n); - int s = 0; - if(esign) - m = divide64(0x80000000, m, s); - return fixed2half(m, exp+14, sign, s); - } - - /// Postprocessing for binary logarithm. - /// \tparam R rounding mode to use - /// \tparam L logarithm for base transformation as Q1.31 - /// \param m fractional part of logarithm as Q0.31 - /// \param ilog signed integer part of logarithm - /// \param exp biased exponent of result - /// \param sign sign bit of result - /// \return value base-transformed and converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred - template unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) - { - uint32 msign = sign_mask(ilog); - m = (((static_cast(ilog)<<27)+(m>>4))^msign) - msign; - if(!m) - return 0; - for(; m<0x80000000; m<<=1,--exp) ; - int i = m >= L, s; - exp += i; - m >>= 1 + i; - sign ^= msign & 0x8000; - if(exp < -11) - return underflow(sign); - m = divide64(m, L, s); - return fixed2half(m, exp, sign, 1); - } - - /// Hypotenuse square root and postprocessing. - /// \tparam R rounding mode to use - /// \param r mantissa as Q2.30 - /// \param exp biased exponent - /// \return square root converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded - template unsigned int hypot_post(uint32 r, int exp) - { - int i = r >> 31; - if((exp+=i) > 46) - return overflow(); - if(exp < -34) - return underflow(); - r = (r>>i) | (r&i); - uint32 m = sqrt<30>(r, exp+=15); - return fixed2half(m, exp-1, 0, r!=0); - } - - /// Division and postprocessing for tangents. - /// \tparam R rounding mode to use - /// \param my dividend as Q1.31 - /// \param mx divisor as Q1.31 - /// \param exp biased exponent of result - /// \param sign sign bit of result - /// \return quotient converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred - template unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) - { - int i = my >= mx, s; - exp += i; - if(exp > 29) - return overflow(sign); - if(exp < -11) - return underflow(sign); - uint32 m = divide64(my>>(i+1), mx, s); - return fixed2half(m, exp, sign, s); - } - - /// Area function and postprocessing. - /// This computes the value directly in Q2.30 using the representation `asinh|acosh(x) = log(x+sqrt(x^2+|-1))`. - /// \tparam R rounding mode to use - /// \tparam S `true` for asinh, `false` for acosh - /// \param arg half-precision argument - /// \return asinh|acosh(\a arg) converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred - template unsigned int area(unsigned int arg) - { - int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i; - uint32 mx = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r; - for(; abs<0x400; abs<<=1,--expy) ; - expy += abs >> 10; - r = ((abs&0x3FF)|0x400) << 5; - r *= r; - i = r >> 31; - expy = 2*expy + i; - r >>= i; - if(S) - { - if(expy < 0) - { - r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast(1)<<-expy)-1))!=0)) : 1); - expy = 0; - } - else - { - r += 0x40000000 >> expy; - i = r >> 31; - r = (r>>i) | (r&i); - expy += i; - } - } - else - { - r -= 0x40000000 >> expy; - for(; r<0x40000000; r<<=1,--expy) ; - } - my = sqrt<30>(r, expy); - my = (my<<15) + (r<<14)/my; - if(S) - { - mx >>= expy - expx; - ilog = expy; - } - else - { - my >>= expx - expy; - ilog = expx; - } - my += mx; - i = my >> 31; - static const int G = S && (R==std::round_to_nearest); - return log2_post(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast(S)<<15)); - } - - /// Class for 1.31 unsigned floating-point computation - struct f31 - { - /// Constructor. - /// \param mant mantissa as 1.31 - /// \param e exponent - HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {} - - /// Constructor. - /// \param abs unsigned half-precision value - f31(unsigned int abs) : exp(-15) - { - for(; abs<0x400; abs<<=1,--exp) ; - m = static_cast((abs&0x3FF)|0x400) << 21; - exp += (abs>>10); - } - - /// Addition operator. - /// \param a first operand - /// \param b second operand - /// \return \a a + \a b - friend f31 operator+(f31 a, f31 b) - { - if(b.exp > a.exp) - std::swap(a, b); - int d = a.exp - b.exp; - uint32 m = a.m + ((d<32) ? (b.m>>d) : 0); - int i = (m&0xFFFFFFFF) < a.m; - return f31(((m+i)>>i)|0x80000000, a.exp+i); - } - - /// Subtraction operator. - /// \param a first operand - /// \param b second operand - /// \return \a a - \a b - friend f31 operator-(f31 a, f31 b) - { - int d = a.exp - b.exp, exp = a.exp; - uint32 m = a.m - ((d<32) ? (b.m>>d) : 0); - if(!m) - return f31(0, -32); - for(; m<0x80000000; m<<=1,--exp) ; - return f31(m, exp); - } - - /// Multiplication operator. - /// \param a first operand - /// \param b second operand - /// \return \a a * \a b - friend f31 operator*(f31 a, f31 b) - { - uint32 m = multiply64(a.m, b.m); - int i = m >> 31; - return f31(m<<(1-i), a.exp + b.exp + i); - } - - /// Division operator. - /// \param a first operand - /// \param b second operand - /// \return \a a / \a b - friend f31 operator/(f31 a, f31 b) - { - int i = a.m >= b.m, s; - uint32 m = divide64((a.m+i)>>i, b.m, s); - return f31(m, a.exp - b.exp + i - 1); - } - - uint32 m; ///< mantissa as 1.31. - int exp; ///< exponent. - }; - - /// Error function and postprocessing. - /// This computes the value directly in Q1.31 using the approximations given - /// [here](https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions). - /// \tparam R rounding mode to use - /// \tparam C `true` for comlementary error function, `false` else - /// \param arg half-precision function argument - /// \return approximated value of error function in half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred - template unsigned int erf(unsigned int arg) - { - unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; - f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t; - f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t / - ((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<>(31-x2.exp))); - return (!C || sign) ? fixed2half(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) : - (e.exp<-25) ? underflow() : fixed2half(e.m>>1, e.exp+14, 0, e.m&1); - } - - /// Gamma function and postprocessing. - /// This approximates the value of either the gamma function or its logarithm directly in Q1.31. - /// \tparam R rounding mode to use - /// \tparam L `true` for lograithm of gamma function, `false` for gamma function - /// \param arg half-precision floating-point value - /// \return lgamma/tgamma(\a arg) in half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if \a arg is not a positive integer - template unsigned int gamma(unsigned int arg) - { -/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 }; - double t = arg + 4.65, s = p[0]; - for(unsigned int i=0; i<5; ++i) - s += p[i+1] / (arg+i); - return std::log(s) + (arg-0.5)*std::log(t) - t; -*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0); - unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; - bool bsign = sign != 0; - f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s = - f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1)) - + f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1)); - int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16); - s = f31((static_cast(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe; - if(x.exp != -1 || x.m != 0x80000000) - { - i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8); - f31 l = f31((static_cast(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe; - s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l); - } - s = x.exp ? (s-t) : (t-s); - if(bsign) - { - if(z.exp >= 0) - { - sign &= (L|((z.m>>(31-z.exp))&1)) - 1; - for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ; - } - if(z.exp == -1) - z = f31(0x80000000, 0) - z; - if(z.exp < -1) - { - z = z * pi; - z.m = sincos(z.m>>(1-z.exp), 30).first; - for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ; - } - else - z = f31(0x80000000, 0); - } - if(L) - { - if(bsign) - { - f31 l(0x92868247, 0); - if(z.exp < 0) - { - uint32 m = log2((z.m+1)>>1, 27); - z = f31(-((static_cast(z.exp)<<26)+(m>>5)), 5); - for(; z.m<0x80000000; z.m<<=1,--z.exp) ; - l = l + z / lbe; - } - sign = static_cast(x.exp&&(l.exp(x.exp==0) << 15; - if(s.exp < -24) - return underflow(sign); - if(s.exp > 15) - return overflow(sign); - } - } - else - { - s = s * lbe; - uint32 m; - if(s.exp < 0) - { - m = s.m >> -s.exp; - s.exp = 0; - } - else - { - m = (s.m<>(31-s.exp)); - } - s.m = exp2(m, 27); - if(!x.exp) - s = f31(0x80000000, 0) / s; - if(bsign) - { - if(z.exp < 0) - s = s * z; - s = pi / s; - if(s.exp < -24) - return underflow(sign); - } - else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1))) - return ((s.exp+14)<<10) + (s.m>>21); - if(s.exp > 15) - return overflow(sign); - } - return fixed2half(s.m, s.exp+14, sign); - } - /// \} - - template struct half_caster; - } - - /// Half-precision floating-point type. - /// This class implements an IEEE-conformant half-precision floating-point type with the usual arithmetic - /// operators and conversions. It is implicitly convertible to single-precision floating-point, which makes artihmetic - /// expressions and functions with mixed-type operands to be of the most precise operand type. - /// - /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and - /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which - /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the - /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of - /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most - /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit - /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if - /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on - /// nearly any reasonable platform. - /// - /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable - /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. - class half - { - public: - /// \name Construction and assignment - /// \{ - - /// Default constructor. - /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics - /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. - HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} - - /// Conversion constructor. - /// \param rhs float to convert - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - explicit half(float rhs) : data_(static_cast(detail::float2half(rhs))) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - operator float() const { return detail::half2float(data_); } - - /// Assignment operator. - /// \param rhs single-precision value to copy from - /// \return reference to this half - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - half& operator=(float rhs) { data_ = static_cast(detail::float2half(rhs)); return *this; } - - /// \} - /// \name Arithmetic updates - /// \{ - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to add - /// \return reference to this half - /// \exception FE_... according to operator+(half,half) - half& operator+=(half rhs) { return *this = *this + rhs; } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to subtract - /// \return reference to this half - /// \exception FE_... according to operator-(half,half) - half& operator-=(half rhs) { return *this = *this - rhs; } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to multiply with - /// \return reference to this half - /// \exception FE_... according to operator*(half,half) - half& operator*=(half rhs) { return *this = *this * rhs; } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to divide by - /// \return reference to this half - /// \exception FE_... according to operator/(half,half) - half& operator/=(half rhs) { return *this = *this / rhs; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to add - /// \return reference to this half - /// \exception FE_... according to operator=() - half& operator+=(float rhs) { return *this = *this + rhs; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to subtract - /// \return reference to this half - /// \exception FE_... according to operator=() - half& operator-=(float rhs) { return *this = *this - rhs; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to multiply with - /// \return reference to this half - /// \exception FE_... according to operator=() - half& operator*=(float rhs) { return *this = *this * rhs; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to divide by - /// \return reference to this half - /// \exception FE_... according to operator=() - half& operator/=(float rhs) { return *this = *this / rhs; } - - /// \} - /// \name Increment and decrement - /// \{ - - /// Prefix increment. - /// \return incremented half value - /// \exception FE_... according to operator+(half,half) - half& operator++() { return *this = *this + half(detail::binary, 0x3C00); } - - /// Prefix decrement. - /// \return decremented half value - /// \exception FE_... according to operator-(half,half) - half& operator--() { return *this = *this + half(detail::binary, 0xBC00); } - - /// Postfix increment. - /// \return non-incremented half value - /// \exception FE_... according to operator+(half,half) - half operator++(int) { half out(*this); ++*this; return out; } - - /// Postfix decrement. - /// \return non-decremented half value - /// \exception FE_... according to operator-(half,half) - half operator--(int) { half out(*this); --*this; return out; } - /// \} - - private: - /// Rounding mode to use - static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); - - /// Constructor. - /// \param bits binary representation to set half to - HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast(bits)) {} - - /// Internal binary representation - detail::uint16 data_; - - #ifndef HALF_DOXYGEN_ONLY - friend HALF_CONSTEXPR_NOERR bool operator==(half, half); - friend HALF_CONSTEXPR_NOERR bool operator!=(half, half); - friend HALF_CONSTEXPR_NOERR bool operator<(half, half); - friend HALF_CONSTEXPR_NOERR bool operator>(half, half); - friend HALF_CONSTEXPR_NOERR bool operator<=(half, half); - friend HALF_CONSTEXPR_NOERR bool operator>=(half, half); - friend HALF_CONSTEXPR half operator-(half); - friend half operator+(half, half); - friend half operator-(half, half); - friend half operator*(half, half); - friend half operator/(half, half); - template friend std::basic_ostream& operator<<(std::basic_ostream&, half); - template friend std::basic_istream& operator>>(std::basic_istream&, half&); - friend HALF_CONSTEXPR half fabs(half); - friend half fmod(half, half); - friend half remainder(half, half); - friend half remquo(half, half, int*); - friend half fma(half, half, half); - friend HALF_CONSTEXPR_NOERR half fmax(half, half); - friend HALF_CONSTEXPR_NOERR half fmin(half, half); - friend half fdim(half, half); - friend half nanh(const char*); - friend half exp(half); - friend half exp2(half); - friend half expm1(half); - friend half log(half); - friend half log10(half); - friend half log2(half); - friend half log1p(half); - friend half sqrt(half); - friend half rsqrt(half); - friend half cbrt(half); - friend half hypot(half, half); - friend half hypot(half, half, half); - friend half pow(half, half); - friend void sincos(half, half*, half*); - friend half sin(half); - friend half cos(half); - friend half tan(half); - friend half asin(half); - friend half acos(half); - friend half atan(half); - friend half atan2(half, half); - friend half sinh(half); - friend half cosh(half); - friend half tanh(half); - friend half asinh(half); - friend half acosh(half); - friend half atanh(half); - friend half erf(half); - friend half erfc(half); - friend half lgamma(half); - friend half tgamma(half); - friend half ceil(half); - friend half floor(half); - friend half trunc(half); - friend half round(half); - friend long lround(half); - friend half rint(half); - friend long lrint(half); - friend half nearbyint(half); - #ifdef HALF_ENABLE_CPP11_LONG_LONG - friend long long llround(half); - friend long long llrint(half); - #endif - friend half frexp(half, int*); - friend half scalbln(half, long); - friend half modf(half, half*); - friend int ilogb(half); - friend half logb(half); - friend half nextafter(half, half); - friend half nexttoward(half, long double); - friend HALF_CONSTEXPR half copysign(half, half); - friend HALF_CONSTEXPR int fpclassify(half); - friend HALF_CONSTEXPR bool isfinite(half); - friend HALF_CONSTEXPR bool isinf(half); - friend HALF_CONSTEXPR bool isnan(half); - friend HALF_CONSTEXPR bool isnormal(half); - friend HALF_CONSTEXPR bool signbit(half); - friend HALF_CONSTEXPR bool isgreater(half, half); - friend HALF_CONSTEXPR bool isgreaterequal(half, half); - friend HALF_CONSTEXPR bool isless(half, half); - friend HALF_CONSTEXPR bool islessequal(half, half); - friend HALF_CONSTEXPR bool islessgreater(half, half); - template friend struct detail::half_caster; - friend class std::numeric_limits; - #if HALF_ENABLE_CPP11_HASH - friend struct std::hash; - #endif - #if HALF_ENABLE_CPP11_USER_LITERALS - friend half literal::operator "" _h(long double); - #endif - #endif - }; - -#if HALF_ENABLE_CPP11_USER_LITERALS - namespace literal - { - /// Half literal. - /// While this returns a properly rounded half-precision value, half literals can unfortunately not be constant - /// expressions due to rather involved conversions. So don't expect this to be a literal literal without involving - /// conversion operations at runtime. It is a convenience feature, not a performance optimization. - /// \param value literal value - /// \return half with of given value (possibly rounded) - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } - } -#endif - - namespace detail - { - /// Helper class for half casts. - /// This class template has to be specialized for all valid cast arguments to define an appropriate static - /// `cast` member function and a corresponding `type` member denoting its return type. - /// \tparam T destination type - /// \tparam U source type - /// \tparam R rounding mode to use - template struct half_caster {}; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); - #endif - - static half cast(U arg) { return cast_impl(arg, is_float()); }; - - private: - static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } - static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } - }; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); - #endif - - static T cast(half arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(half arg, true_type) { return half2float(arg.data_); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } - }; - template struct half_caster - { - static half cast(half arg) { return arg; } - }; - } -} - -/// Extensions to the C++ standard library. -namespace std -{ - /// Numeric limits for half-precision floats. - /// **See also:** Documentation for [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits) - template<> class numeric_limits - { - public: - /// Is template specialization. - static HALF_CONSTEXPR_CONST bool is_specialized = true; - - /// Supports signed values. - static HALF_CONSTEXPR_CONST bool is_signed = true; - - /// Is not an integer type. - static HALF_CONSTEXPR_CONST bool is_integer = false; - - /// Is not exact. - static HALF_CONSTEXPR_CONST bool is_exact = false; - - /// Doesn't provide modulo arithmetic. - static HALF_CONSTEXPR_CONST bool is_modulo = false; - - /// Has a finite set of values. - static HALF_CONSTEXPR_CONST bool is_bounded = true; - - /// IEEE conformant. - static HALF_CONSTEXPR_CONST bool is_iec559 = true; - - /// Supports infinity. - static HALF_CONSTEXPR_CONST bool has_infinity = true; - - /// Supports quiet NaNs. - static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; - - /// Supports signaling NaNs. - static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true; - - /// Supports subnormal values. - static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; - - /// Supports no denormalization detection. - static HALF_CONSTEXPR_CONST bool has_denorm_loss = false; - - #if HALF_ERRHANDLING_THROWS - static HALF_CONSTEXPR_CONST bool traps = true; - #else - /// Traps only if [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID) is acitvated. - static HALF_CONSTEXPR_CONST bool traps = false; - #endif - - /// Does not support no pre-rounding underflow detection. - static HALF_CONSTEXPR_CONST bool tinyness_before = false; - - /// Rounding mode. - static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style; - - /// Significant digits. - static HALF_CONSTEXPR_CONST int digits = 11; - - /// Significant decimal digits. - static HALF_CONSTEXPR_CONST int digits10 = 3; - - /// Required decimal digits to represent all possible values. - static HALF_CONSTEXPR_CONST int max_digits10 = 5; - - /// Number base. - static HALF_CONSTEXPR_CONST int radix = 2; - - /// One more than smallest exponent. - static HALF_CONSTEXPR_CONST int min_exponent = -13; - - /// Smallest normalized representable power of 10. - static HALF_CONSTEXPR_CONST int min_exponent10 = -4; - - /// One more than largest exponent - static HALF_CONSTEXPR_CONST int max_exponent = 16; - - /// Largest finitely representable power of 10. - static HALF_CONSTEXPR_CONST int max_exponent10 = 4; - - /// Smallest positive normal value. - static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } - - /// Smallest finite value. - static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } - - /// Largest finite value. - static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } - - /// Difference between 1 and next representable value. - static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } - - /// Maximum rounding error in ULP (units in the last place). - static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW - { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } - - /// Positive infinity. - static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } - - /// Quiet NaN. - static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } - - /// Signaling NaN. - static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } - - /// Smallest positive subnormal value. - static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } - }; - -#if HALF_ENABLE_CPP11_HASH - /// Hash function for half-precision floats. - /// This is only defined if C++11 `std::hash` is supported and enabled. - /// - /// **See also:** Documentation for [std::hash](https://en.cppreference.com/w/cpp/utility/hash) - template<> struct hash - { - /// Type of function argument. - typedef half_float::half argument_type; - - /// Function return type. - typedef size_t result_type; - - /// Compute hash function. - /// \param arg half to hash - /// \return hash value - result_type operator()(argument_type arg) const { return hash()(arg.data_&-static_cast(arg.data_!=0x8000)); } - }; -#endif -} - -namespace half_float -{ - /// \anchor compop - /// \name Comparison operators - /// \{ - - /// Comparison for equality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN - inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)); - } - - /// Comparison for inequality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN - inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y) - { - return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)); - } - - /// Comparison for less than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN - inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } - - /// Comparison for greater than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN - inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } - - /// Comparison for less equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN - inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } - - /// Comparison for greater equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN - inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } - - /// \} - /// \anchor arithmetics - /// \name Arithmetic operators - /// \{ - - /// Identity. - /// \param arg operand - /// \return unchanged operand - inline HALF_CONSTEXPR half operator+(half arg) { return arg; } - - /// Negation. - /// \param arg operand - /// \return negated operand - inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); } - - /// Addition. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return sum of half expressions - /// \exception FE_INVALID if \a x and \a y are infinities with different signs or signaling NaNs - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half operator+(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)+detail::half2float(y.data_))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF; - bool sub = ((x.data_^y.data_)&0x8000) != 0; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ : - (sub && absx==0x7C00) ? detail::invalid() : y.data_); - if(!absx) - return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_)); - if(!absy) - return x; - unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000; - if(absy > absx) - std::swap(absx, absy); - int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my; - if(d < 13) - { - my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3; - my = (my>>d) | ((my&((1<(half::round_style==std::round_toward_neg_infinity)<<15); - for(; mx<0x2000 && exp>1; mx<<=1,--exp) ; - } - else - { - mx += my; - int i = mx >> 14; - if((exp+=i) > 30) - return half(detail::binary, detail::overflow(sign)); - mx = (mx>>i) | (mx&i); - } - return half(detail::binary, detail::rounded(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0)); - #endif - } - - /// Subtraction. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return difference of half expressions - /// \exception FE_INVALID if \a x and \a y are infinities with equal signs or signaling NaNs - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half operator-(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)-detail::half2float(y.data_))); - #else - return x + -y; - #endif - } - - /// Multiplication. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return product of half expressions - /// \exception FE_INVALID if multiplying 0 with infinity or if \a x or \a y is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half operator*(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)*detail::half2float(y.data_))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16; - unsigned int sign = (x.data_^y.data_) & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - ((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00)); - if(!absx || !absy) - return half(detail::binary, sign); - for(; absx<0x400; absx<<=1,--exp) ; - for(; absy<0x400; absy<<=1,--exp) ; - detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); - int i = m >> 21, s = m & i; - exp += (absx>>10) + (absy>>10) + i; - if(exp > 29) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -11) - return half(detail::binary, detail::underflow(sign)); - return half(detail::binary, detail::fixed2half(m>>i, exp, sign, s)); - #endif - } - - /// Division. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return quotient of half expressions - /// \exception FE_INVALID if dividing 0s or infinities with each other or if \a x or \a y is signaling NaN - /// \exception FE_DIVBYZERO if dividing finite value by 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half operator/(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)/detail::half2float(y.data_))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14; - unsigned int sign = (x.data_^y.data_) & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0))); - if(!absx) - return half(detail::binary, absy ? sign : detail::invalid()); - if(!absy) - return half(detail::binary, detail::pole(sign)); - for(; absx<0x400; absx<<=1,--exp) ; - for(; absy<0x400; absy<<=1,++exp) ; - detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; - int i = mx < my; - exp += (absx>>10) - (absy>>10) - i; - if(exp > 29) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -11) - return half(detail::binary, detail::underflow(sign)); - mx <<= 12 + i; - my <<= 1; - return half(detail::binary, detail::fixed2half(mx/my, exp, sign, mx%my!=0)); - #endif - } - - /// \} - /// \anchor streaming - /// \name Input and output - /// \{ - - /// Output operator. - /// This uses the built-in functionality for streaming out floating-point numbers. - /// \param out output stream to write into - /// \param arg half expression to write - /// \return reference to output stream - template std::basic_ostream& operator<<(std::basic_ostream &out, half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return out << detail::half2float(arg.data_); - #else - return out << detail::half2float(arg.data_); - #endif - } - - /// Input operator. - /// This uses the built-in functionality for streaming in floating-point numbers, specifically double precision floating - /// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref HALF_ARITHMETIC_TYPE)). So the input string is first - /// rounded to double precision using the underlying platform's current floating-point rounding mode before being rounded - /// to half-precision using the library's half-precision rounding mode. - /// \param in input stream to read from - /// \param arg half to read into - /// \return reference to input stream - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - template std::basic_istream& operator>>(std::basic_istream &in, half &arg) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t f; - #else - double f; - #endif - if(in >> f) - arg.data_ = detail::float2half(f); - return in; - } - - /// \} - /// \anchor basic - /// \name Basic mathematical operations - /// \{ - - /// Absolute value. - /// **See also:** Documentation for [std::fabs](https://en.cppreference.com/w/cpp/numeric/math/fabs). - /// \param arg operand - /// \return absolute value of \a arg - inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); } - - /// Absolute value. - /// **See also:** Documentation for [std::abs](https://en.cppreference.com/w/cpp/numeric/math/fabs). - /// \param arg operand - /// \return absolute value of \a arg - inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); } - - /// Remainder of division. - /// **See also:** Documentation for [std::fmod](https://en.cppreference.com/w/cpp/numeric/math/fmod). - /// \param x first operand - /// \param y second operand - /// \return remainder of floating-point division. - /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN - inline half fmod(half x, half y) - { - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==0x7C00) ? detail::invalid() : x.data_); - if(!absy) - return half(detail::binary, detail::invalid()); - if(!absx) - return x; - if(absx == absy) - return half(detail::binary, sign); - return half(detail::binary, sign|detail::mod(absx, absy)); - } - - /// Remainder of division. - /// **See also:** Documentation for [std::remainder](https://en.cppreference.com/w/cpp/numeric/math/remainder). - /// \param x first operand - /// \param y second operand - /// \return remainder of floating-point division. - /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN - inline half remainder(half x, half y) - { - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==0x7C00) ? detail::invalid() : x.data_); - if(!absy) - return half(detail::binary, detail::invalid()); - if(absx == absy) - return half(detail::binary, sign); - return half(detail::binary, sign^detail::mod(absx, absy)); - } - - /// Remainder of division. - /// **See also:** Documentation for [std::remquo](https://en.cppreference.com/w/cpp/numeric/math/remquo). - /// \param x first operand - /// \param y second operand - /// \param quo address to store some bits of quotient at - /// \return remainder of floating-point division. - /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN - inline half remquo(half x, half y, int *quo) - { - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_)); - if(!absy) - return half(detail::binary, detail::invalid()); - bool qsign = ((value^y.data_)&0x8000) != 0; - int q = 1; - if(absx != absy) - value ^= detail::mod(absx, absy, &q); - return *quo = qsign ? -q : q, half(detail::binary, value); - } - - /// Fused multiply add. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::fma](https://en.cppreference.com/w/cpp/numeric/math/fma). - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return ( \a x * \a y ) + \a z rounded as one operation. - /// \exception FE_INVALID according to operator*() and operator+() unless any argument is a quiet NaN and no argument is a signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding the final addition - inline half fma(half x, half y, half z) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); - #if HALF_ENABLE_CPP11_CMATH && FP_FAST_FMA - return half(detail::binary, detail::float2half(std::fma(fx, fy, fz))); - #else - return half(detail::binary, detail::float2half(fx*fy+fz)); - #endif - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15; - unsigned int sign = (x.data_^y.data_) & 0x8000; - bool sub = ((sign^z.data_)&0x8000) != 0; - if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) - return (absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) : - (absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : - (absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z; - if(!absx || !absy) - return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign)); - for(; absx<0x400; absx<<=1,--exp) ; - for(; absy<0x400; absy<<=1,--exp) ; - detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); - int i = m >> 21; - exp += (absx>>10) + (absy>>10) + i; - m <<= 3 - i; - if(absz) - { - int expz = 0; - for(; absz<0x400; absz<<=1,--expz) ; - expz += absz >> 10; - detail::uint32 mz = static_cast((absz&0x3FF)|0x400) << 13; - if(expz > exp || (expz == exp && mz > m)) - { - std::swap(m, mz); - std::swap(exp, expz); - if(sub) - sign = z.data_ & 0x8000; - } - int d = exp - expz; - mz = (d<23) ? ((mz>>d)|((mz&((static_cast(1)<(half::round_style==std::round_toward_neg_infinity)<<15); - for(; m<0x800000; m<<=1,--exp) ; - } - else - { - m += mz; - i = m >> 24; - m = (m>>i) | (m&i); - exp += i; - } - } - if(exp > 30) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -10) - return half(detail::binary, detail::underflow(sign)); - return half(detail::binary, detail::fixed2half(m, exp-1, sign)); - #endif - } - - /// Maximum of half expressions. - /// **See also:** Documentation for [std::fmax](https://en.cppreference.com/w/cpp/numeric/math/fmax). - /// \param x first operand - /// \param y second operand - /// \return maximum of operands, ignoring quiet NaNs - /// \exception FE_INVALID if \a x or \a y is signaling NaN - inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) - { - return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < - (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); - } - - /// Minimum of half expressions. - /// **See also:** Documentation for [std::fmin](https://en.cppreference.com/w/cpp/numeric/math/fmin). - /// \param x first operand - /// \param y second operand - /// \return minimum of operands, ignoring quiet NaNs - /// \exception FE_INVALID if \a x or \a y is signaling NaN - inline HALF_CONSTEXPR_NOERR half fmin(half x, half y) - { - return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) > - (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); - } - - /// Positive difference. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::fdim](https://en.cppreference.com/w/cpp/numeric/math/fdim). - /// \param x first operand - /// \param y second operand - /// \return \a x - \a y or 0 if difference negative - /// \exception FE_... according to operator-(half,half) - inline half fdim(half x, half y) - { - if(isnan(x) || isnan(y)) - return half(detail::binary, detail::signal(x.data_, y.data_)); - return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y); - } - - /// Get NaN value. - /// **See also:** Documentation for [std::nan](https://en.cppreference.com/w/cpp/numeric/math/nan). - /// \param arg string code - /// \return quiet NaN - inline half nanh(const char *arg) - { - unsigned int value = 0x7FFF; - while(*arg) - value ^= static_cast(*arg++) & 0xFF; - return half(detail::binary, value); - } - - /// \} - /// \anchor exponential - /// \name Exponential functions - /// \{ - - /// Exponential function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::exp](https://en.cppreference.com/w/cpp/numeric/math/exp). - /// \param arg function argument - /// \return e raised to \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half exp(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::exp(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); - if(abs >= 0x4C80) - return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); - detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); - if(e < 14) - { - exp = 0; - m >>= 14 - e; - } - else - { - exp = m >> (45-e); - m = (m<<(e-14)) & 0x7FFFFFFF; - } - return half(detail::binary, detail::exp2_post(m, exp, (arg.data_&0x8000)!=0, 0, 26)); - #endif - } - - /// Binary exponential. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::exp2](https://en.cppreference.com/w/cpp/numeric/math/exp2). - /// \param arg function argument - /// \return 2 raised to \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half exp2(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::exp2(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10); - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); - if(abs >= 0x4E40) - return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); - return half(detail::binary, detail::exp2_post( - (static_cast(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28)); - #endif - } - - /// Exponential minus one. - /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` - /// and in <1% of inputs for any other rounding mode. - /// - /// **See also:** Documentation for [std::expm1](https://en.cppreference.com/w/cpp/numeric/math/expm1). - /// \param arg function argument - /// \return e raised to \a arg and subtracted by 1 - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half expm1(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::expm1(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_)); - if(abs >= 0x4A00) - return half(detail::binary, (arg.data_&0x8000) ? detail::rounded(0xBBFF, 1, 1) : detail::overflow()); - detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); - if(e < 14) - { - exp = 0; - m >>= 14 - e; - } - else - { - exp = m >> (45-e); - m = (m<<(e-14)) & 0x7FFFFFFF; - } - m = detail::exp2(m); - if(sign) - { - int s = 0; - if(m > 0x80000000) - { - ++exp; - m = detail::divide64(0x80000000, m, s); - } - m = 0x80000000 - ((m>>exp)|((m&((static_cast(1)<>exp) : 1; - for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ; - if(exp > 29) - return half(detail::binary, detail::overflow()); - return half(detail::binary, detail::rounded(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0)); - #endif - } - - /// Natural logarithm. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::log](https://en.cppreference.com/w/cpp/numeric/math/log). - /// \param arg function argument - /// \return logarithm of \a arg to base e - /// \exception FE_INVALID for signaling NaN or negative argument - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half log(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::log(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(arg.data_ & 0x8000) - return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs >= 0x7C00) - return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - return half(detail::binary, detail::log2_post( - detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17)); - #endif - } - - /// Common logarithm. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::log10](https://en.cppreference.com/w/cpp/numeric/math/log10). - /// \param arg function argument - /// \return logarithm of \a arg to base 10 - /// \exception FE_INVALID for signaling NaN or negative argument - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half log10(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::log10(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(arg.data_ & 0x8000) - return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs >= 0x7C00) - return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - switch(abs) - { - case 0x4900: return half(detail::binary, 0x3C00); - case 0x5640: return half(detail::binary, 0x4000); - case 0x63D0: return half(detail::binary, 0x4200); - case 0x70E2: return half(detail::binary, 0x4400); - } - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - return half(detail::binary, detail::log2_post( - detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16)); - #endif - } - - /// Binary logarithm. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::log2](https://en.cppreference.com/w/cpp/numeric/math/log2). - /// \param arg function argument - /// \return logarithm of \a arg to base 2 - /// \exception FE_INVALID for signaling NaN or negative argument - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half log2(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::log2(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15, s = 0; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(arg.data_ & 0x8000) - return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs >= 0x7C00) - return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - if(abs == 0x3C00) - return half(detail::binary, 0); - for(; abs<0x400; abs<<=1,--exp) ; - exp += (abs>>10); - if(!(abs&0x3FF)) - { - unsigned int value = static_cast(exp<0) << 15, m = std::abs(exp) << 6; - for(exp=18; m<0x400; m<<=1,--exp) ; - return half(detail::binary, value+(exp<<10)+m); - } - detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m = - (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign; - if(!m) - return half(detail::binary, 0); - for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ; - for(; m>0xFFFFFFF; m>>=1,++exp) - s |= m & 1; - return half(detail::binary, detail::fixed2half(m, exp, sign&0x8000, s)); - #endif - } - - /// Natural logarithm plus one. - /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` - /// and in ~1% of inputs for any other rounding mode. - /// - /// **See also:** Documentation for [std::log1p](https://en.cppreference.com/w/cpp/numeric/math/log1p). - /// \param arg function argument - /// \return logarithm of \a arg plus 1 to base e - /// \exception FE_INVALID for signaling NaN or argument <-1 - /// \exception FE_DIVBYZERO for -1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half log1p(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::log1p(detail::half2float(arg.data_)))); - #else - if(arg.data_ >= 0xBC00) - return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - detail::uint32 m = static_cast((abs&0x3FF)|0x400) << 20; - if(arg.data_ & 0x8000) - { - m = 0x40000000 - (m>>-exp); - for(exp=0; m<0x40000000; m<<=1,--exp) ; - } - else - { - if(exp < 0) - { - m = 0x40000000 + (m>>-exp); - exp = 0; - } - else - { - m += 0x40000000 >> exp; - int i = m >> 31; - m >>= i; - exp += i; - } - } - return half(detail::binary, detail::log2_post(detail::log2(m), exp, 17)); - #endif - } - - /// \} - /// \anchor power - /// \name Power functions - /// \{ - - /// Square root. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::sqrt](https://en.cppreference.com/w/cpp/numeric/math/sqrt). - /// \param arg function argument - /// \return square root of \a arg - /// \exception FE_INVALID for signaling NaN and negative arguments - /// \exception FE_INEXACT according to rounding - inline half sqrt(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::sqrt(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = 15; - if(!abs || arg.data_ >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_); - for(; abs<0x400; abs<<=1,--exp) ; - detail::uint32 r = static_cast((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10); - return half(detail::binary, detail::rounded((exp<<10)+(m&0x3FF), r>m, r!=0)); - #endif - } - - /// Inverse square root. - /// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing - /// 1 / sqrt(\a arg) in half-precision, in addition to also being faster. - /// \param arg function argument - /// \return reciprocal of square root of \a arg - /// \exception FE_INVALID for signaling NaN and negative arguments - /// \exception FE_INEXACT according to rounding - inline half rsqrt(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::internal_t(1)/std::sqrt(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000; - if(!abs || arg.data_ >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? - detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0); - for(; abs<0x400; abs<<=1,bias-=0x400) ; - unsigned int frac = (abs+=bias) & 0x7FF; - if(frac == 0x400) - return half(detail::binary, 0x7A00-(abs>>1)); - if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) || - (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B))) - return pow(arg, half(detail::binary, 0xB800)); - detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my; - int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21; - for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; - i = (my*=mz>>10) >> 31; - expy += i; - my = (my>>(20+i)) + 1; - i = (mz=my*my) >> 21; - for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; - i = (my*=(mz>>10)+1) >> 31; - return half(detail::binary, detail::fixed2half(my>>i, expy+i+14)); - #endif - } - - /// Cubic root. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::cbrt](https://en.cppreference.com/w/cpp/numeric/math/cbrt). - /// \param arg function argument - /// \return cubic root of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT according to rounding - inline half cbrt(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::cbrt(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs || abs == 0x3C00 || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1, --exp); - detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m = - (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign; - for(exp=2; m<0x80000000; m<<=1,--exp) ; - m = detail::multiply64(m, 0xAAAAAAAB); - int i = m >> 31, s; - exp += i; - m <<= 1 - i; - if(exp < 0) - { - f = m >> -exp; - exp = 0; - } - else - { - f = (m<> (31-exp); - } - m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26); - if(sign) - { - if(m > 0x80000000) - { - m = detail::divide64(0x80000000, m, s); - ++exp; - } - exp = -exp; - } - return half(detail::binary, (half::round_style==std::round_to_nearest) ? - detail::fixed2half(m, exp+14, arg.data_&0x8000) : - detail::fixed2half((m+0x80)>>8, exp+14, arg.data_&0x8000)); - #endif - } - - /// Hypotenuse function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). - /// \param x first argument - /// \param y second argument - /// \return square root of sum of squares without internal over- or underflows - /// \exception FE_INVALID if \a x or \a y is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root - inline half hypot(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_); - #if HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::hypot(fx, fy))); - #else - return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy))); - #endif - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, y.data_) : - (absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_)); - if(!absx) - return half(detail::binary, absy ? detail::check_underflow(absy) : 0); - if(!absy) - return half(detail::binary, detail::check_underflow(absx)); - if(absy > absx) - std::swap(absx, absy); - for(; absx<0x400; absx<<=1,--expx) ; - for(; absy<0x400; absy<<=1,--expy) ; - detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; - mx *= mx; - my *= my; - int ix = mx >> 21, iy = my >> 21; - expx = 2*(expx+(absx>>10)) - 15 + ix; - expy = 2*(expy+(absy>>10)) - 15 + iy; - mx <<= 10 - ix; - my <<= 10 - iy; - int d = expx - expy; - my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); - #endif - } - - /// Hypotenuse function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). - /// \param x first argument - /// \param y second argument - /// \param z third argument - /// \return square root of sum of squares without internal over- or underflows - /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root - inline half hypot(half x, half y, half z) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); - return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy+fz*fz))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0; - if(!absx) - return hypot(y, z); - if(!absy) - return hypot(x, z); - if(!absz) - return hypot(x, y); - if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) - return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) : - (absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) : - (absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) : - detail::signal(x.data_, y.data_, z.data_)); - if(absz > absy) - std::swap(absy, absz); - if(absy > absx) - std::swap(absx, absy); - if(absz > absy) - std::swap(absy, absz); - for(; absx<0x400; absx<<=1,--expx) ; - for(; absy<0x400; absy<<=1,--expy) ; - for(; absz<0x400; absz<<=1,--expz) ; - detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400; - mx *= mx; - my *= my; - mz *= mz; - int ix = mx >> 21, iy = my >> 21, iz = mz >> 21; - expx = 2*(expx+(absx>>10)) - 15 + ix; - expy = 2*(expy+(absy>>10)) - 15 + iy; - expz = 2*(expz+(absz>>10)) - 15 + iz; - mx <<= 10 - ix; - my <<= 10 - iy; - mz <<= 10 - iz; - int d = expy - expz; - mz = (d<30) ? ((mz>>d)|((mz&((static_cast(1)<>1) | (my&1); - if(++expy > expx) - { - std::swap(mx, my); - std::swap(expx, expy); - } - } - d = expx - expy; - my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); - #endif - } - - /// Power function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.00025% of inputs. - /// - /// **See also:** Documentation for [std::pow](https://en.cppreference.com/w/cpp/numeric/math/pow). - /// \param x base - /// \param y exponent - /// \return \a x raised to \a y - /// \exception FE_INVALID if \a x or \a y is signaling NaN or if \a x is finite an negative and \a y is finite and not integral - /// \exception FE_DIVBYZERO if \a x is 0 and \a y is negative - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half pow(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::pow(detail::half2float(x.data_), detail::half2float(y.data_)))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15; - if(!absy || x.data_ == 0x3C00) - return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_)); - bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1))); - unsigned int sign = x.data_ & (static_cast((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15); - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() : - (0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U)))); - if(!absx) - return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign); - if((x.data_&0x8000) && !is_int) - return half(detail::binary, detail::invalid()); - if(x.data_ == 0xBC00) - return half(detail::binary, sign|0x3C00); - switch(y.data_) - { - case 0x3800: return sqrt(x); - case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_)); - case 0x4000: return x * x; - case 0xBC00: return half(detail::binary, 0x3C00) / x; - } - for(; absx<0x400; absx<<=1,--exp) ; - detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = - (((ilog<<27)+((detail::log2(static_cast((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign; - for(exp=-11; m<0x80000000; m<<=1,--exp) ; - for(; absy<0x400; absy<<=1,--exp) ; - m = detail::multiply64(m, static_cast((absy&0x3FF)|0x400)<<21); - int i = m >> 31; - exp += (absy>>10) + i; - m <<= 1 - i; - if(exp < 0) - { - f = m >> -exp; - exp = 0; - } - else - { - f = (m<> (31-exp); - } - return half(detail::binary, detail::exp2_post(f, exp, ((msign&1)^(y.data_>>15))!=0, sign)); - #endif - } - - /// \} - /// \anchor trigonometric - /// \name Trigonometric functions - /// \{ - - /// Compute sine and cosine simultaneously. - /// This returns the same results as sin() and cos() but is faster than calling each function individually. - /// - /// This function is exact to rounding for all rounding modes. - /// \param arg function argument - /// \param sin variable to take sine of \a arg - /// \param cos variable to take cosine of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline void sincos(half arg, half *sin, half *cos) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t f = detail::half2float(arg.data_); - *sin = half(detail::binary, detail::float2half(std::sin(f))); - *cos = half(detail::binary, detail::float2half(std::cos(f))); - #else - int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k; - if(abs >= 0x7C00) - *sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - else if(!abs) - { - *sin = arg; - *cos = half(detail::binary, 0x3C00); - } - else if(abs < 0x2500) - { - *sin = half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); - } - else - { - if(half::round_style != std::round_to_nearest) - { - switch(abs) - { - case 0x48B7: - *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); - *cos = half(detail::binary, detail::rounded(0xBBFF, 1, 1)); - return; - case 0x598C: - *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x80FC, 1, 1)); - return; - case 0x6A64: - *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x27FF, 1, 1)); - return; - case 0x6D8C: - *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); - return; - } - } - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); - switch(k & 3) - { - case 1: sc = std::make_pair(sc.second, -sc.first); break; - case 2: sc = std::make_pair(-sc.first, -sc.second); break; - case 3: sc = std::make_pair(-sc.second, sc.first); break; - } - *sin = half(detail::binary, detail::fixed2half((sc.first^-static_cast(sign))+sign)); - *cos = half(detail::binary, detail::fixed2half(sc.second)); - } - #endif - } - - /// Sine function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::sin](https://en.cppreference.com/w/cpp/numeric/math/sin). - /// \param arg function argument - /// \return sine value of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half sin(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::sin(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, k; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2900) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - if(half::round_style != std::round_to_nearest) - switch(abs) - { - case 0x48B7: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); - case 0x6A64: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); - case 0x6D8C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); - } - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); - detail::uint32 sign = -static_cast(((k>>1)&1)^(arg.data_>>15)); - return half(detail::binary, detail::fixed2half((((k&1) ? sc.second : sc.first)^sign) - sign)); - #endif - } - - /// Cosine function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::cos](https://en.cppreference.com/w/cpp/numeric/math/cos). - /// \param arg function argument - /// \return cosine value of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half cos(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::cos(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, k; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2500) - return half(detail::binary, detail::rounded(0x3BFF, 1, 1)); - if(half::round_style != std::round_to_nearest && abs == 0x598C) - return half(detail::binary, detail::rounded(0x80FC, 1, 1)); - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); - detail::uint32 sign = -static_cast(((k>>1)^k)&1); - return half(detail::binary, detail::fixed2half((((k&1) ? sc.first : sc.second)^sign) - sign)); - #endif - } - - /// Tangent function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::tan](https://en.cppreference.com/w/cpp/numeric/math/tan). - /// \param arg function argument - /// \return tangent value of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half tan(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::tan(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = 13, k; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2700) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - if(half::round_style != std::round_to_nearest) - switch(abs) - { - case 0x658C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x07E6, 1, 1)); - case 0x7330: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x4B62, 1, 1)); - } - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 30); - if(k & 1) - sc = std::make_pair(-sc.second, sc.first); - detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second); - detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx; - for(; my<0x80000000; my<<=1,--exp) ; - for(; mx<0x80000000; mx<<=1,++exp) ; - return half(detail::binary, detail::tangent_post(my, mx, exp, (signy^signx^arg.data_)&0x8000)); - #endif - } - - /// Arc sine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::asin](https://en.cppreference.com/w/cpp/numeric/math/asin). - /// \param arg function argument - /// \return arc sine value of \a arg - /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half asin(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::asin(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(!abs) - return arg; - if(abs >= 0x3C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : - detail::rounded(sign|0x3E48, 0, 1)); - if(abs < 0x2900) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3)) - return half(detail::binary, detail::rounded(arg.data_+1, 1, 1)); - std::pair sc = detail::atan2_args(abs); - detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26); - return half(detail::binary, detail::fixed2half(m, 14, sign)); - #endif - } - - /// Arc cosine function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::acos](https://en.cppreference.com/w/cpp/numeric/math/acos). - /// \param arg function argument - /// \return arc cosine value of \a arg - /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half acos(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::acos(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15; - if(!abs) - return half(detail::binary, detail::rounded(0x3E48, 0, 1)); - if(abs >= 0x3C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : - sign ? detail::rounded(0x4248, 0, 1) : 0); - std::pair cs = detail::atan2_args(abs); - detail::uint32 m = detail::atan2(cs.second, cs.first, 28); - return half(detail::binary, detail::fixed2half(sign ? (0xC90FDAA2-m) : m, 15, 0, sign)); - #endif - } - - /// Arc tangent function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::atan](https://en.cppreference.com/w/cpp/numeric/math/atan). - /// \param arg function argument - /// \return arc tangent value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half atan(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::atan(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::rounded(sign|0x3E48, 0, 1) : detail::signal(arg.data_)); - if(abs <= 0x2700) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - int exp = (abs>>10) + (abs<=0x3FF); - detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10); - detail::uint32 m = (exp>15) ? detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) : - detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28); - return half(detail::binary, detail::fixed2half(m, 14, sign)); - #endif - } - - /// Arc tangent function. - /// This function may be 1 ULP off the correctly rounded exact result in ~0.005% of inputs for `std::round_to_nearest`, - /// in ~0.1% of inputs for `std::round_toward_zero` and in ~0.02% of inputs for any other rounding mode. - /// - /// **See also:** Documentation for [std::atan2](https://en.cppreference.com/w/cpp/numeric/math/atan2). - /// \param y numerator - /// \param x denominator - /// \return arc tangent value - /// \exception FE_INVALID if \a x or \a y is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half atan2(half y, half x) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::atan2(detail::half2float(y.data_), detail::half2float(x.data_)))); - #else - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - { - if(absx > 0x7C00 || absy > 0x7C00) - return half(detail::binary, detail::signal(x.data_, y.data_)); - if(absy == 0x7C00) - return half(detail::binary, (absx<0x7C00) ? detail::rounded(signy|0x3E48, 0, 1) : - signx ? detail::rounded(signy|0x40B6, 0, 1) : - detail::rounded(signy|0x3A48, 0, 1)); - return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); - } - if(!absy) - return signx ? half(detail::binary, detail::rounded(signy|0x4248, 0, 1)) : y; - if(!absx) - return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); - int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF); - if(d > (signx ? 18 : 12)) - return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); - if(signx && d < -11) - return half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); - if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) - { - for(; absy<0x400; absy<<=1,--d) ; - detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800; - int i = my < mx; - d -= i; - if(d < -25) - return half(detail::binary, detail::underflow(signy)); - my <<= 11 + i; - return half(detail::binary, detail::fixed2half(my/mx, d+14, signy, my%mx!=0)); - } - detail::uint32 m = detail::atan2( ((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)), - ((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1))); - return half(detail::binary, detail::fixed2half(signx ? (0xC90FDAA2-m) : m, 15, signy, signx)); - #endif - } - - /// \} - /// \anchor hyperbolic - /// \name Hyperbolic functions - /// \{ - - /// Hyperbolic sine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::sinh](https://en.cppreference.com/w/cpp/numeric/math/sinh). - /// \param arg function argument - /// \return hyperbolic sine value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half sinh(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::sinh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp; - if(!abs || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - if(abs <= 0x2900) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27); - detail::uint32 m = mm.first - mm.second; - for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ; - unsigned int sign = arg.data_ & 0x8000; - if(exp > 29) - return half(detail::binary, detail::overflow(sign)); - return half(detail::binary, detail::fixed2half(m, exp, sign)); - #endif - } - - /// Hyperbolic cosine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::cosh](https://en.cppreference.com/w/cpp/numeric/math/cosh). - /// \param arg function argument - /// \return hyperbolic cosine value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half cosh(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::cosh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00); - std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26); - detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31; - m = (m>>i) | (m&i) | 0x80000000; - if((exp+=13+i) > 29) - return half(detail::binary, detail::overflow()); - return half(detail::binary, detail::fixed2half(m, exp)); - #endif - } - - /// Hyperbolic tangent. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::tanh](https://en.cppreference.com/w/cpp/numeric/math/tanh). - /// \param arg function argument - /// \return hyperbolic tangent value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half tanh(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::tanh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000)); - if(abs >= 0x4500) - return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); - if(abs < 0x2700) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - if(half::round_style != std::round_to_nearest && abs == 0x2D3F) - return half(detail::binary, detail::rounded(arg.data_-3, 0, 1)); - std::pair mm = detail::hyperbolic_args(abs, exp, 27); - detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31; - for(exp=13; my<0x80000000; my<<=1,--exp) ; - mx = (mx>>i) | 0x80000000; - return half(detail::binary, detail::tangent_post(my, mx, exp-i, arg.data_&0x8000)); - #endif - } - - /// Hyperbolic area sine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::asinh](https://en.cppreference.com/w/cpp/numeric/math/asinh). - /// \param arg function argument - /// \return area sine value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half asinh(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::asinh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF; - if(!abs || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - if(abs <= 0x2900) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - if(half::round_style != std::round_to_nearest) - switch(abs) - { - case 0x32D4: return half(detail::binary, detail::rounded(arg.data_-13, 1, 1)); - case 0x3B5B: return half(detail::binary, detail::rounded(arg.data_-197, 1, 1)); - } - return half(detail::binary, detail::area(arg.data_)); - #endif - } - - /// Hyperbolic area cosine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::acosh](https://en.cppreference.com/w/cpp/numeric/math/acosh). - /// \param arg function argument - /// \return area cosine value of \a arg - /// \exception FE_INVALID for signaling NaN or arguments <1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half acosh(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::acosh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF; - if((arg.data_&0x8000) || abs < 0x3C00) - return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs == 0x3C00) - return half(detail::binary, 0); - if(arg.data_ >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - return half(detail::binary, detail::area(arg.data_)); - #endif - } - - /// Hyperbolic area tangent. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::atanh](https://en.cppreference.com/w/cpp/numeric/math/atanh). - /// \param arg function argument - /// \return area tangent value of \a arg - /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 - /// \exception FE_DIVBYZERO for +/-1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half atanh(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::atanh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = 0; - if(!abs) - return arg; - if(abs >= 0x3C00) - return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2700) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - detail::uint32 m = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m; - for(; mx<0x80000000; mx<<=1,++exp) ; - int i = my >= mx, s; - return half(detail::binary, detail::log2_post(detail::log2( - (detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000)); - #endif - } - - /// \} - /// \anchor special - /// \name Error and gamma functions - /// \{ - - /// Error function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. - /// - /// **See also:** Documentation for [std::erf](https://en.cppreference.com/w/cpp/numeric/math/erf). - /// \param arg function argument - /// \return error function value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half erf(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::erf(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF; - if(!abs || abs >= 0x7C00) - return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg; - if(abs >= 0x4200) - return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); - return half(detail::binary, detail::erf(arg.data_)); - #endif - } - - /// Complementary error function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. - /// - /// **See also:** Documentation for [std::erfc](https://en.cppreference.com/w/cpp/numeric/math/erfc). - /// \param arg function argument - /// \return 1 minus error function value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half erfc(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::erfc(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(abs >= 0x7C00) - return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x4400) - return half(detail::binary, detail::rounded((sign>>1)-(sign>>15), sign>>15, 1)); - return half(detail::binary, detail::erf(arg.data_)); - #endif - } - - /// Natural logarithm of gamma function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.025% of inputs. - /// - /// **See also:** Documentation for [std::lgamma](https://en.cppreference.com/w/cpp/numeric/math/lgamma). - /// \param arg function argument - /// \return natural logarith of gamma function for \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_DIVBYZERO for 0 or negative integer arguments - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half lgamma(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::lgamma(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); - if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) - return half(detail::binary, detail::pole()); - if(arg.data_ == 0x3C00 || arg.data_ == 0x4000) - return half(detail::binary, 0); - return half(detail::binary, detail::gamma(arg.data_)); - #endif - } - - /// Gamma function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.25% of inputs. - /// - /// **See also:** Documentation for [std::tgamma](https://en.cppreference.com/w/cpp/numeric/math/tgamma). - /// \param arg function argument - /// \return gamma function value of \a arg - /// \exception FE_INVALID for signaling NaN, negative infinity or negative integer arguments - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half tgamma(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::tgamma(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF; - if(!abs) - return half(detail::binary, detail::pole(arg.data_)); - if(abs >= 0x7C00) - return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) - return half(detail::binary, detail::invalid()); - if(arg.data_ >= 0xCA80) - return half(detail::binary, detail::underflow((1-((abs>>(25-(abs>>10)))&1))<<15)); - if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000)) - return half(detail::binary, detail::overflow()); - if(arg.data_ == 0x3C00) - return arg; - return half(detail::binary, detail::gamma(arg.data_)); - #endif - } - - /// \} - /// \anchor rounding - /// \name Rounding - /// \{ - - /// Nearest integer not less than half value. - /// **See also:** Documentation for [std::ceil](https://en.cppreference.com/w/cpp/numeric/math/ceil). - /// \param arg half to round - /// \return nearest integer not less than \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded - inline half ceil(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - - /// Nearest integer not greater than half value. - /// **See also:** Documentation for [std::floor](https://en.cppreference.com/w/cpp/numeric/math/floor). - /// \param arg half to round - /// \return nearest integer not greater than \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded - inline half floor(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - - /// Nearest integer not greater in magnitude than half value. - /// **See also:** Documentation for [std::trunc](https://en.cppreference.com/w/cpp/numeric/math/trunc). - /// \param arg half to round - /// \return nearest integer not greater in magnitude than \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded - inline half trunc(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - - /// Nearest integer. - /// **See also:** Documentation for [std::round](https://en.cppreference.com/w/cpp/numeric/math/round). - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded - inline half round(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - - /// Nearest integer. - /// **See also:** Documentation for [std::lround](https://en.cppreference.com/w/cpp/numeric/math/round). - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases - /// \exception FE_INVALID if value is not representable as `long` - inline long lround(half arg) { return detail::half2int(arg.data_); } - - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::rint](https://en.cppreference.com/w/cpp/numeric/math/rint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded - inline half rint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::lrint](https://en.cppreference.com/w/cpp/numeric/math/rint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID if value is not representable as `long` - /// \exception FE_INEXACT if value had to be rounded - inline long lrint(half arg) { return detail::half2int(arg.data_); } - - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::nearbyint](https://en.cppreference.com/w/cpp/numeric/math/nearbyint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID for signaling NaN - inline half nearbyint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } -#if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer. - /// **See also:** Documentation for [std::llround](https://en.cppreference.com/w/cpp/numeric/math/round). - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases - /// \exception FE_INVALID if value is not representable as `long long` - inline long long llround(half arg) { return detail::half2int(arg.data_); } - - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::llrint](https://en.cppreference.com/w/cpp/numeric/math/rint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID if value is not representable as `long long` - /// \exception FE_INEXACT if value had to be rounded - inline long long llrint(half arg) { return detail::half2int(arg.data_); } -#endif - - /// \} - /// \anchor float - /// \name Floating point manipulation - /// \{ - - /// Decompress floating-point number. - /// **See also:** Documentation for [std::frexp](https://en.cppreference.com/w/cpp/numeric/math/frexp). - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return significant in range [0.5, 1) - /// \exception FE_INVALID for signaling NaN - inline half frexp(half arg, int *exp) - { - *exp = 0; - unsigned int abs = arg.data_ & 0x7FFF; - if(abs >= 0x7C00 || !abs) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1,--*exp) ; - *exp += (abs>>10) - 14; - return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF)); - } - - /// Multiply by power of two. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::scalbln](https://en.cppreference.com/w/cpp/numeric/math/scalbn). - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half scalbln(half arg, long exp) - { - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(abs >= 0x7C00 || !abs) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - if(exp > 30) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -10) - return half(detail::binary, detail::underflow(sign)); - else if(exp > 0) - return half(detail::binary, sign|(exp<<10)|(abs&0x3FF)); - unsigned int m = (abs&0x3FF) | 0x400; - return half(detail::binary, detail::rounded(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0)); - } - - /// Multiply by power of two. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::scalbn](https://en.cppreference.com/w/cpp/numeric/math/scalbn). - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half scalbn(half arg, int exp) { return scalbln(arg, exp); } - - /// Multiply by power of two. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::ldexp](https://en.cppreference.com/w/cpp/numeric/math/ldexp). - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - inline half ldexp(half arg, int exp) { return scalbln(arg, exp); } - - /// Extract integer and fractional parts. - /// **See also:** Documentation for [std::modf](https://en.cppreference.com/w/cpp/numeric/math/modf). - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part - /// \exception FE_INVALID for signaling NaN - inline half modf(half arg, half *iptr) - { - unsigned int abs = arg.data_ & 0x7FFF; - if(abs > 0x7C00) - { - arg = half(detail::binary, detail::signal(arg.data_)); - return *iptr = arg, arg; - } - if(abs >= 0x6400) - return *iptr = arg, half(detail::binary, arg.data_&0x8000); - if(abs < 0x3C00) - return iptr->data_ = arg.data_ & 0x8000, arg; - unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask; - iptr->data_ = arg.data_ & ~mask; - if(!m) - return half(detail::binary, arg.data_&0x8000); - for(; m<0x400; m<<=1,--exp) ; - return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF)); - } - - /// Extract exponent. - /// **See also:** Documentation for [std::ilogb](https://en.cppreference.com/w/cpp/numeric/math/ilogb). - /// \param arg number to query - /// \return floating-point exponent - /// \retval FP_ILOGB0 for zero - /// \retval FP_ILOGBNAN for NaN - /// \retval INT_MAX for infinity - /// \exception FE_INVALID for 0 or infinite values - inline int ilogb(half arg) - { - int abs = arg.data_ & 0x7FFF, exp; - if(!abs || abs >= 0x7C00) - { - detail::raise(FE_INVALID); - return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN; - } - for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; - return exp; - } - - /// Extract exponent. - /// **See also:** Documentation for [std::logb](https://en.cppreference.com/w/cpp/numeric/math/logb). - /// \param arg number to query - /// \return floating-point exponent - /// \exception FE_INVALID for signaling NaN - /// \exception FE_DIVBYZERO for 0 - inline half logb(half arg) - { - int abs = arg.data_ & 0x7FFF, exp; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); - for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; - unsigned int value = static_cast(exp<0) << 15; - if(exp) - { - unsigned int m = std::abs(exp) << 6; - for(exp=18; m<0x400; m<<=1,--exp) ; - value |= (exp<<10) + m; - } - return half(detail::binary, value); - } - - /// Next representable value. - /// **See also:** Documentation for [std::nextafter](https://en.cppreference.com/w/cpp/numeric/math/nextafter). - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW for infinite result from finite argument - /// \exception FE_UNDERFLOW for subnormal result - inline half nextafter(half from, half to) - { - int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; - if(fabs > 0x7C00 || tabs > 0x7C00) - return half(detail::binary, detail::signal(from.data_, to.data_)); - if(from.data_ == to.data_ || !(fabs|tabs)) - return to; - if(!fabs) - { - detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); - return half(detail::binary, (to.data_&0x8000)+1); - } - unsigned int out = from.data_ + (((from.data_>>15)^static_cast( - (from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1; - detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00); - detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7C00)<0x400); - return half(detail::binary, out); - } - - /// Next representable value. - /// **See also:** Documentation for [std::nexttoward](https://en.cppreference.com/w/cpp/numeric/math/nexttoward). - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW for infinite result from finite argument - /// \exception FE_UNDERFLOW for subnormal result - inline half nexttoward(half from, long double to) - { - int fabs = from.data_ & 0x7FFF; - if(fabs > 0x7C00) - return half(detail::binary, detail::signal(from.data_)); - long double lfrom = static_cast(from); - if(detail::builtin_isnan(to) || lfrom == to) - return half(static_cast(to)); - if(!fabs) - { - detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); - return half(detail::binary, (static_cast(detail::builtin_signbit(to))<<15)+1); - } - unsigned int out = from.data_ + (((from.data_>>15)^static_cast(lfrom 0x7C00; } - - /// Check if normal number. - /// **See also:** Documentation for [std::isnormal](https://en.cppreference.com/w/cpp/numeric/math/isnormal). - /// \param arg number to check - /// \retval true if normal number - /// \retval false if either subnormal, zero, infinity or NaN - inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } - - /// Check sign. - /// **See also:** Documentation for [std::signbit](https://en.cppreference.com/w/cpp/numeric/math/signbit). - /// \param arg number to check - /// \retval true for negative number - /// \retval false for positive number - inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; } - - /// \} - /// \anchor compfunc - /// \name Comparison - /// \{ - - /// Quiet comparison for greater than. - /// **See also:** Documentation for [std::isgreater](https://en.cppreference.com/w/cpp/numeric/math/isgreater). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else - inline HALF_CONSTEXPR bool isgreater(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } - - /// Quiet comparison for greater equal. - /// **See also:** Documentation for [std::isgreaterequal](https://en.cppreference.com/w/cpp/numeric/math/isgreaterequal). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else - inline HALF_CONSTEXPR bool isgreaterequal(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } - - /// Quiet comparison for less than. - /// **See also:** Documentation for [std::isless](https://en.cppreference.com/w/cpp/numeric/math/isless). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else - inline HALF_CONSTEXPR bool isless(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } - - /// Quiet comparison for less equal. - /// **See also:** Documentation for [std::islessequal](https://en.cppreference.com/w/cpp/numeric/math/islessequal). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else - inline HALF_CONSTEXPR bool islessequal(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } - - /// Quiet comarison for less or greater. - /// **See also:** Documentation for [std::islessgreater](https://en.cppreference.com/w/cpp/numeric/math/islessgreater). - /// \param x first operand - /// \param y second operand - /// \retval true if either less or greater - /// \retval false else - inline HALF_CONSTEXPR bool islessgreater(half x, half y) - { - return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y); - } - - /// Quiet check if unordered. - /// **See also:** Documentation for [std::isunordered](https://en.cppreference.com/w/cpp/numeric/math/isunordered). - /// \param x first operand - /// \param y second operand - /// \retval true if unordered (one or two NaN operands) - /// \retval false else - inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); } - - /// \} - /// \anchor casting - /// \name Casting - /// \{ - - /// Cast to or from half-precision floating-point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the default rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - template T half_cast(U arg) { return detail::half_caster::cast(arg); } - - /// Cast to or from half-precision floating-point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the specified rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam R rounding mode to use. - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding - template T half_cast(U arg) { return detail::half_caster::cast(arg); } - /// \} - - /// \} - /// \anchor errors - /// \name Error handling - /// \{ - - /// Clear exception flags. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::feclearexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feclearexcept). - /// \param excepts OR of exceptions to clear - /// \retval 0 all selected flags cleared successfully - inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; } - - /// Test exception flags. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::fetestexcept](https://en.cppreference.com/w/cpp/numeric/fenv/fetestexcept). - /// \param excepts OR of exceptions to test - /// \return OR of selected exceptions if raised - inline int fetestexcept(int excepts) { return detail::errflags() & excepts; } - - /// Raise exception flags. - /// This raises the specified floating point exceptions and also invokes any additional automatic exception handling as - /// configured with the [HALF_ERRHANDLIG_...](\ref HALF_ERRHANDLING_ERRNO) preprocessor symbols. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::feraiseexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feraiseexcept). - /// \param excepts OR of exceptions to raise - /// \retval 0 all selected exceptions raised successfully - inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; } - - /// Save exception flags. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::fegetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). - /// \param flagp adress to store flag state at - /// \param excepts OR of flags to save - /// \retval 0 for success - inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; } - - /// Restore exception flags. - /// This only copies the specified exception state (including unset flags) without incurring any additional exception handling. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::fesetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). - /// \param flagp adress to take flag state from - /// \param excepts OR of flags to restore - /// \retval 0 for success - inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; } - - /// Throw C++ exceptions based on set exception flags. - /// This function manually throws a corresponding C++ exception if one of the specified flags is set, - /// no matter if automatic throwing (via [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID)) is enabled or not. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// \param excepts OR of exceptions to test - /// \param msg error message to use for exception description - /// \throw std::domain_error if `FE_INVALID` or `FE_DIVBYZERO` is selected and set - /// \throw std::overflow_error if `FE_OVERFLOW` is selected and set - /// \throw std::underflow_error if `FE_UNDERFLOW` is selected and set - /// \throw std::range_error if `FE_INEXACT` is selected and set - inline void fethrowexcept(int excepts, const char *msg = "") - { - excepts &= detail::errflags(); - if(excepts & (FE_INVALID|FE_DIVBYZERO)) - throw std::domain_error(msg); - if(excepts & FE_OVERFLOW) - throw std::overflow_error(msg); - if(excepts & FE_UNDERFLOW) - throw std::underflow_error(msg); - if(excepts & FE_INEXACT) - throw std::range_error(msg); - } - /// \} -} - - -#undef HALF_UNUSED_NOERR -#undef HALF_CONSTEXPR -#undef HALF_CONSTEXPR_CONST -#undef HALF_CONSTEXPR_NOERR -#undef HALF_NOEXCEPT -#undef HALF_NOTHROW -#undef HALF_THREAD_LOCAL -#undef HALF_TWOS_COMPLEMENT_INT -#ifdef HALF_POP_WARNINGS - #pragma warning(pop) - #undef HALF_POP_WARNINGS -#endif - -#endif diff --git a/external_libs/runtime/CMakeLists.txt b/external_libs/runtime/CMakeLists.txt index 6de7dcb59..efd7a0902 100644 --- a/external_libs/runtime/CMakeLists.txt +++ b/external_libs/runtime/CMakeLists.txt @@ -10,7 +10,8 @@ project(brt-libs LANGUAGES CXX CUDA) set(REPO_ROOT ${PROJECT_SOURCE_DIR}) message("REPO_ROOT = ${REPO_ROOT}") -set(CUTLASS_ROOT ${REPO_ROOT}/../external/cutlass) +set(BYTEIR_ROOT ${REPO_ROOT}/../..) +set(CUTLASS_ROOT ${BYTEIR_ROOT}/external/cutlass) message("CUTLASS_ROOT = ${CUTLASS_ROOT}") add_subdirectory(flash_attn) From 9d2d2c3b37eee832332c694b198d8f08dafbf277 Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Tue, 23 Jan 2024 22:27:45 +0000 Subject: [PATCH 6/9] clean up --- .../runtime/flash_attn/lib/kernel_traits.h | 66 +++++++++---------- .../flash_attn/lib/kernel_traits_sm90.h | 24 +++---- .../cuda/providers/default/custom/custom.cc | 8 +-- 3 files changed, 47 insertions(+), 51 deletions(-) diff --git a/external_libs/runtime/flash_attn/lib/kernel_traits.h b/external_libs/runtime/flash_attn/lib/kernel_traits.h index cd1fae003..b10b955e2 100644 --- a/external_libs/runtime/flash_attn/lib/kernel_traits.h +++ b/external_libs/runtime/flash_attn/lib/kernel_traits.h @@ -89,11 +89,11 @@ struct Flash_fwd_kernel_traits : public Base { // This has to be kBlockKSmem, using kHeadDim gives wrong results for // d=128 Layout>, Stride, _1>>{})); - using SmemLayoutQ = decltype( - tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + using SmemLayoutQ = decltype(tile_to_shape( + SmemLayoutAtomQ{}, Shape, Int>{})); - using SmemLayoutKV = decltype( - tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + using SmemLayoutKV = decltype(tile_to_shape( + SmemLayoutAtomQ{}, Shape, Int>{})); // This has to be kBlockN and not 8, otherwise we get wrong results for d=128 using SmemLayoutAtomVtransposedNoSwizzle = @@ -114,8 +114,8 @@ struct Flash_fwd_kernel_traits : public Base { using SmemLayoutAtomO = decltype(composition( Swizzle{}, Layout, Int>, Stride, _1>>{})); - using SmemLayoutO = decltype( - tile_to_shape(SmemLayoutAtomO{}, Shape, Int>{})); + using SmemLayoutO = decltype(tile_to_shape( + SmemLayoutAtomO{}, Shape, Int>{})); using SmemCopyAtomO = Copy_Atom; using SmemCopyAtomOaccum = Copy_Atom; @@ -149,12 +149,12 @@ struct Flash_fwd_kernel_traits : public Base { // slightly faster. using Gmem_copy_struct = std::conditional_t< Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL, DefaultCopy>; - using GmemTiledCopyQKV = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopyO = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopyQKV = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per read + using GmemTiledCopyO = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP"); @@ -162,9 +162,9 @@ struct Flash_fwd_kernel_traits : public Base { Shape, Int>, Stride, _1>>; - using GmemTiledCopyP = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtomP{}, - Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopyP = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtomP{}, + Layout>{})); // Val layout, 8 vals per store using GmemLayoutAtomOaccum = std::conditional_t< kBlockKSmem == 32, @@ -247,10 +247,10 @@ struct Flash_bwd_kernel_traits : public Base { using SmemLayoutQdO = decltype(tile_to_shape( SmemLayoutAtomQdO{}, make_shape(Int{}, Int{}))); - using SmemLayoutAtomKV = decltype( - composition(Swizzle{}, - Layout, Int>, - Stride, _1>>{})); + using SmemLayoutAtomKV = decltype(composition( + Swizzle{}, + Layout, Int>, + Stride, _1>>{})); using SmemLayoutKV = decltype(tile_to_shape( // SmemLayoutAtomQdO{}, SmemLayoutAtomKV{}, make_shape(Int{}, Int{}))); @@ -375,15 +375,15 @@ struct Flash_bwd_kernel_traits : public Base { using GmemTiledCopyQKV = decltype(make_tiled_copy( Copy_Atom{}, GmemLayoutAtom{}, Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopydO = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemTiledCopydKV = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store - using GmemTiledCopydQ = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopydO = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopydKV = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopydQ = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store using GmemLayoutAtomdQaccum = std::conditional_t< kBlockKSmem == 32, Layout, // Thread layout, 8 threads per row @@ -394,11 +394,11 @@ struct Flash_bwd_kernel_traits : public Base { Copy_Atom{}, GmemLayoutAtomdQaccum{}, Layout>{})); // Val layout, 4 vals per store - using GmemTiledCopydQaccumAtomicAdd = decltype( - make_tiled_copy(Copy_Atom{}, - Layout, // Thread layout, 8 threads per row - Stride<_32, _1>>{}, - Layout>{})); // Val layout, 1 val per store + using GmemTiledCopydQaccumAtomicAdd = decltype(make_tiled_copy( + Copy_Atom{}, + Layout, // Thread layout, 8 threads per row + Stride<_32, _1>>{}, + Layout>{})); // Val layout, 1 val per store }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h b/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h index 1c2fc9bab..ca8cdfebd 100644 --- a/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h +++ b/external_libs/runtime/flash_attn/lib/kernel_traits_sm90.h @@ -88,11 +88,11 @@ struct Flash_fwd_kernel_traits : public Base { // This has to be kBlockKSmem, using kHeadDim gives wrong results for // d=128 Layout>, Stride, _1>>{})); - using SmemLayoutQ = decltype( - tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + using SmemLayoutQ = decltype(tile_to_shape( + SmemLayoutAtomQ{}, Shape, Int>{})); - using SmemLayoutKV = decltype( - tile_to_shape(SmemLayoutAtomQ{}, Shape, Int>{})); + using SmemLayoutKV = decltype(tile_to_shape( + SmemLayoutAtomQ{}, Shape, Int>{})); using SmemLayoutAtomVtransposed = decltype(composition(Swizzle{}, @@ -110,8 +110,8 @@ struct Flash_fwd_kernel_traits : public Base { using SmemLayoutAtomO = decltype(composition( Swizzle{}, Layout, Int>, Stride, _1>>{})); - using SmemLayoutO = decltype( - tile_to_shape(SmemLayoutAtomO{}, Shape, Int>{})); + using SmemLayoutO = decltype(tile_to_shape( + SmemLayoutAtomO{}, Shape, Int>{})); using SmemCopyAtomO = Copy_Atom; static constexpr int kSmemQCount = size(SmemLayoutQ{}); @@ -147,9 +147,9 @@ struct Flash_fwd_kernel_traits : public Base { using GmemTiledCopyQKV = decltype(make_tiled_copy( Copy_Atom{}, GmemLayoutAtom{}, Layout>{})); // Val layout, 8 vals per read - using GmemTiledCopyO = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopyO = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtom{}, + Layout>{})); // Val layout, 8 vals per store static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad; static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP"); @@ -157,9 +157,9 @@ struct Flash_fwd_kernel_traits : public Base { Shape, Int>, Stride, _1>>; - using GmemTiledCopyP = decltype( - make_tiled_copy(Copy_Atom{}, GmemLayoutAtomP{}, - Layout>{})); // Val layout, 8 vals per store + using GmemTiledCopyP = decltype(make_tiled_copy( + Copy_Atom{}, GmemLayoutAtomP{}, + Layout>{})); // Val layout, 8 vals per store }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/runtime/lib/backends/cuda/providers/default/custom/custom.cc b/runtime/lib/backends/cuda/providers/default/custom/custom.cc index 01a49755f..36eaac4e6 100644 --- a/runtime/lib/backends/cuda/providers/default/custom/custom.cc +++ b/runtime/lib/backends/cuda/providers/default/custom/custom.cc @@ -1,4 +1,4 @@ -//===- copy.cc ------------------------------------------------*--- C++ -*-===// +//===- custom.cc ----------------------------------------------*--- C++ -*-===// // // Copyright 2022 ByteDance Ltd. and/or its affiliates. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,10 +20,8 @@ #include "brt/core/framework/op_accessor.h" #include "brt/core/ir/util.h" #include "byteir/Dialect/Byre/ByreDialect.h" -#include "mlir/IR/BuiltinOps.h" // ModuleOp #include #include -#include #include using namespace brt; @@ -41,8 +39,6 @@ CustomOpKernel::CustomOpKernel(const OpKernelInfo &info) : OpKernel(info) { std::string lib_path = accessor.GetAttrAsString("lib_path"); std::string api_name = accessor.GetAttrAsString("api_name"); custom_lib_hdl = dlopen(lib_path.c_str(), RTLD_LAZY | RTLD_GLOBAL); - // std::cout << "Current path is " << std::filesystem::current_path() << '\n'; - // std::cout << "API name is " << api_name << '\n'; std::string msg = std::string("Custom lib ") + lib_path + " load failed"; BRT_ENFORCE(custom_lib_hdl != nullptr, msg); run_func_ = reinterpret_cast( @@ -77,7 +73,7 @@ common::Status CustomOpKernel::RunImpl(const ExecutionContext &ctx) { static_cast(ctx.work_queue)->GetComputeStream(); run_func_(tensor_args, extra_args, stream); - // need to free extra_args since there is a mallocnbg= + // need to free extra_args since there is a malloc free(extra_args); delete[] tensor_args; return common::Status::OK(); From 57665519c64c0a5bbfaea68dd41c6e356f9f374b Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Tue, 23 Jan 2024 23:07:25 +0000 Subject: [PATCH 7/9] fix for llvm update --- .../Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp index 016067506..d12239c20 100644 --- a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp @@ -177,9 +177,9 @@ struct ByreCustomOpBufferization return false; } - AliasingOpResultList - getAliasingOpResults(Operation * /*op*/, OpOperand & /*opOperand*/, - const AnalysisState & /*state*/) const { + AliasingValueList getAliasingValues(Operation * /*op*/, + OpOperand & /*opOperand*/, + const AnalysisState & /*state*/) const { return {}; } From e3494e1c499283bf7830b82516d290d88571c0b5 Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Tue, 23 Jan 2024 23:46:14 +0000 Subject: [PATCH 8/9] fix for llvm update --- .../Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp index d12239c20..c483c6507 100644 --- a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp @@ -201,9 +201,8 @@ struct ByreCustomOpBufferization if (!tensorType) return failure(); - bool dealloc = shouldDeallocateOpResult(opResult, options); - auto tensorAlloc = allocateTensorForShapedValue( - rewriter, op->getLoc(), opResult, /*escapse*/ !dealloc, options); + auto tensorAlloc = allocateTensorForShapedValue(rewriter, op->getLoc(), + opResult, options); if (failed(tensorAlloc)) return failure(); From 2691869cd316694aee884b49f92f67361b820078 Mon Sep 17 00:00:00 2001 From: Zhekun Zhang Date: Wed, 24 Jan 2024 01:17:27 +0000 Subject: [PATCH 9/9] fix for llvm update --- .../lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp index c483c6507..47201dd49 100644 --- a/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/compiler/lib/Dialect/Byre/Transforms/BufferizableOpInterfaceImpl.cpp @@ -163,7 +163,7 @@ struct ByreComputeOpBufferization struct ByreCustomOpBufferization : public BufferizableOpInterface::ExternalModel { - bool bufferizesToAllocation(Operation * /*op*/, OpResult /*opResult*/) const { + bool bufferizesToAllocation(Operation * /*op*/, Value /*value*/) const { return true; }