Skip to content

Commit

Permalink
Merge pull request #9 from stevenewald/simd
Browse files Browse the repository at this point in the history
Add AVX512 equations
  • Loading branch information
stevenewald authored Nov 26, 2024
2 parents b527e68 + 765cd9d commit 7cf35e8
Show file tree
Hide file tree
Showing 12 changed files with 198 additions and 49 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ include(cmake/project-is-top-level.cmake)
include(cmake/variables.cmake)

set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64 -mavx512f -mavx512dq -mavx512vl -mavx512bf16")
add_compile_options(-fno-inline -fno-omit-frame-pointer)


# ---- Declare library ----

Expand All @@ -24,6 +27,8 @@ add_library(
source/mandelbrot/mandelbrot_window.cpp
source/graphics/color_conversions/color_conversions.cpp
source/graphics/aspect_ratio/aspect_ratio.cpp
source/mandelbrot/equations_simd.cpp
source/mandelbrot/equations.cpp
)

target_include_directories(
Expand Down
8 changes: 4 additions & 4 deletions source/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

namespace fractal {

constexpr std::size_t WINDOW_WIDTH = 800UZ*2;
constexpr std::size_t WINDOW_HEIGHT = 600UZ*2;
constexpr std::size_t WINDOW_WIDTH = 5120;
constexpr std::size_t WINDOW_HEIGHT = 1440;
constexpr std::size_t FRAME_RATE = 60UZ;

constexpr display_domain DISPLAY_DOMAIN{
Expand All @@ -17,8 +17,8 @@ constexpr display_domain DISPLAY_DOMAIN{
};

constexpr complex_domain START_COMPLEX_DOMAIN{
{complex_underlying{-2}, complex_underlying{-1.5}},
{complex_underlying{1}, complex_underlying{1.5} }
{complex_underlying{-1.402}, complex_underlying{-.001}},
{complex_underlying{-1.400}, complex_underlying{.001} }
};

const complex_underlying MANDELBROT_DIVERGENCE_NORM = 4;
Expand Down
6 changes: 6 additions & 0 deletions source/coordinates.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ struct display_domain {
return *this;
}

DisplayCoordinateIterator& operator+=(difference_type other)
{
current_coordinate_ += other;
return *this;
}

DisplayCoordinateIterator operator-(difference_type other)
{
DisplayCoordinateIterator tmp = *this;
Expand Down
3 changes: 2 additions & 1 deletion source/graphics/aspect_ratio/aspect_ratio.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#pragma once

#include "config.hpp"
#include "coordinates.hpp"

namespace fractal {
display_coordinate calculate_rectangle_end_point(
display_coordinate start, display_coordinate current,
float target_aspect_ratio = 800.0f / 600.0f
float target_aspect_ratio = static_cast<float>(WINDOW_WIDTH) / WINDOW_HEIGHT
);
} // namespace fractal
18 changes: 6 additions & 12 deletions source/graphics/color_conversions/color_conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,28 @@ color hsv_to_rgb(float hue, float saturation, float value)
float green_temp = 0.0f;
float blue_temp = 0.0f;

if (0 <= hue_prime && hue_prime < 1) {
if (hue_prime < 1) {
red_temp = chroma;
green_temp = uint16_termediate;
blue_temp = 0;
}
else if (1 <= hue_prime && hue_prime < 2) {
else if (hue_prime < 2) {
red_temp = uint16_termediate;
green_temp = chroma;
blue_temp = 0;
}
else if (2 <= hue_prime && hue_prime < 3) {
red_temp = 0;
else if (hue_prime < 3) {
green_temp = chroma;
blue_temp = uint16_termediate;
}
else if (3 <= hue_prime && hue_prime < 4) {
red_temp = 0;
else if (hue_prime < 4) {
green_temp = uint16_termediate;
blue_temp = chroma;
}
else if (4 <= hue_prime && hue_prime < 5) {
else if (hue_prime < 5) {
red_temp = uint16_termediate;
green_temp = 0;
blue_temp = chroma;
}
else if (5 <= hue_prime && hue_prime < 6) {
else if (hue_prime < 6) {
red_temp = chroma;
green_temp = 0;
blue_temp = uint16_termediate;
}

Expand Down
28 changes: 28 additions & 0 deletions source/mandelbrot/equations.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "equations.hpp"

#include "config.hpp"

namespace fractal {
// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition
std::complex<complex_underlying>
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant)
{
return z_n * z_n + constant;
}

iteration_count compute_iterations(
std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant,
iteration_count max_iters
)
{
iteration_count iterations = 0;
std::complex<complex_underlying> z_n = z_0;

while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) {
z_n = step(z_n, constant);
++iterations;
}

return iterations;
}
} // namespace fractal
23 changes: 4 additions & 19 deletions source/mandelbrot/equations.hpp
Original file line number Diff line number Diff line change
@@ -1,29 +1,14 @@
#pragma once

#include "config.hpp"
#include "units.hpp"

namespace fractal {
// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition
inline std::complex<complex_underlying>
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant)
{
return z_n * z_n + constant;
}
std::complex<complex_underlying>
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant);

inline iteration_count compute_iterations(
iteration_count compute_iterations(
std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant,
iteration_count max_iters
)
{
iteration_count iterations = 0;
std::complex<complex_underlying> z_n = z_0;

while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) {
z_n = step(z_n, constant);
++iterations;
}

return iterations;
}
);
} // namespace fractal
67 changes: 67 additions & 0 deletions source/mandelbrot/equations_simd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "config.hpp"
#include "equations.hpp"
#include "units.hpp"

#include <immintrin.h>

namespace fractal {
std::array<iteration_count, 8> compute_iterations(
const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters
)
{
static const auto SQUARED_DIVERGENCE =
MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM;

alignas(64) std::array<double, 8> reals = z_0.real;
alignas(64) std::array<double, 8> imags = z_0.imaginary;
alignas(64) std::array<double, 8> const_reals = constant.real;
alignas(64) std::array<double, 8> const_imags = constant.imaginary;

std::array<iteration_count, 8> solved_its = {0};

__m512d input_vec_real = _mm512_load_pd(reals.data());
__m512d input_vec_imag = _mm512_load_pd(imags.data());
__m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data());
__m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data());
__m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data());

for (iteration_count iterations = 0; iterations < max_iters; iterations++) {
// Square real
__m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real);

// Square imag
__m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag);

// Create imags
__m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2));
input_vec_imag =
_mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags);

// Create reals
__m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag);
input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals);

// Create squared norms
__m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag);
__mmask8 solved_mask = _mm512_cmp_pd_mask(
squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS
);

uint32_t solved = _cvtmask8_u32(solved_mask);
solved_its_vec = _mm512_mask_blend_epi16(
solved_mask, solved_its_vec,
_mm512_set1_epi16(static_cast<int16_t>(iterations))
);
if (solved == 0xFF) [[unlikely]]
break;
}

__mmask32 mask = _mm512_cmpeq_epi16_mask(solved_its_vec, _mm512_set1_epi16(0));
solved_its_vec = _mm512_mask_mov_epi16(
solved_its_vec, mask, _mm512_set1_epi16(static_cast<int16_t>(max_iters))
);
_mm512_storeu_epi16(solved_its.data(), solved_its_vec);

return solved_its;
}
} // namespace fractal
11 changes: 11 additions & 0 deletions source/mandelbrot/equations_simd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

#include "config.hpp"
#include "units.hpp"

namespace fractal {

std::array<iteration_count, 8> compute_iterations(
const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters
);
} // namespace fractal
63 changes: 52 additions & 11 deletions source/mandelbrot/mandelbrot_window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,54 @@

#include "config.hpp"
#include "coordinates.hpp"
#include "equations.hpp"
#include "equations_simd.hpp"
#include "graphics/aspect_ratio/aspect_ratio.hpp"
#include "graphics/color_conversions/color_conversions.hpp"
#include "graphics/display_to_complex.hpp"
#include "units.hpp"

#include <fmt/base.h>
#include <fmt/format.h>
#include <immintrin.h>
#include <SFML/Graphics/Drawable.hpp>
#include <SFML/Graphics/Image.hpp>
#include <SFML/Graphics/Sprite.hpp>
#include <SFML/Graphics/Texture.hpp>

#include <chrono>

#include <future>
#include <memory>
#include <optional>

namespace fractal {
void MandelbrotWindow::draw_coordinate_(
const display_coordinate& display_coord, const complex_coordinate& complex_coord
display_coordinate display_coord, const avx512_complex& complex_coords
)
{
iteration_count iterations =
compute_iterations({0, 0}, complex_coord, MANDELBROT_MAX_ITERATIONS);
static constexpr avx512_complex START{};
alignas(64) std::array<iteration_count, 8> iterations =
compute_iterations(START, complex_coords, MANDELBROT_MAX_ITERATIONS);
alignas(64) std::array<float, 8> ratios{};

__m128i iterations_vec = _mm_loadu_epi16(iterations.data());

__m128i input_low = _mm_unpacklo_epi16(iterations_vec, _mm_setzero_si128());
__m128i input_high = _mm_unpackhi_epi16(iterations_vec, _mm_setzero_si128());

__m128 floats_low = _mm_cvtepi32_ps(input_low);
__m128 floats_high = _mm_cvtepi32_ps(input_high);

float iteration_ratio = static_cast<float>(iterations) / MANDELBROT_MAX_ITERATIONS;
set_pixel_color(display_coord, iteration_ratio);
floats_low = _mm_div_ps(floats_low, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS));
floats_high = _mm_div_ps(floats_high, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS));

_mm_storeu_ps(ratios.begin(), floats_low);
_mm_storeu_ps(ratios.begin() + 4, floats_high);

for (size_t i = 0; i < 8; i++) {
set_pixel_color(display_coord, ratios[i]);
display_coord.first++;
}
}

void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
Expand All @@ -38,22 +62,36 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
domain_ = {new_top, new_bottom};
to_complex = {DISPLAY_DOMAIN.end_coordinate, domain_};

auto process_coordinate = [&](const display_coordinate& display_coord) {
auto complex_coord = to_complex.to_complex_projection(display_coord);
draw_coordinate_(display_coord, complex_coord);
auto process_coordinates = [&](display_coordinate start_display_coord) {
std::array<std::complex<complex_underlying>, 8> coords{};
auto t = start_display_coord;
for (size_t i = 0; i < 8; i++) {
coords[i] = to_complex.to_complex_projection(start_display_coord);
start_display_coord.first++;
}
avx512_complex coords2{};
for (size_t i = 0; i < 8; i++) {
coords2.real[i] = coords[i].real();
coords2.imaginary[i] = coords[i].imag();
}
draw_coordinate_(t, coords2);
};

auto process_chunk = [&](display_domain::DisplayCoordinateIterator start,
display_domain::DisplayCoordinateIterator end) {
std::for_each(start, end, process_coordinate);
for (auto it = start; it != end; it += 8) {
process_coordinates(*it);
}
};

uint32_t total = WINDOW_WIDTH * WINDOW_HEIGHT;
uint32_t chunks = 32;
uint32_t chunks = 128;
uint32_t step = total / chunks;

std::vector<std::future<void>> futures;

auto start = std::chrono::high_resolution_clock::now();

for (uint32_t chunk = 0; chunk < chunks; chunk++) {
display_domain::DisplayCoordinateIterator start =
DISPLAY_DOMAIN.begin() + chunk * step;
Expand All @@ -66,6 +104,9 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
for (const auto& future : futures) {
future.wait();
}
auto end = std::chrono::high_resolution_clock::now();
auto time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << fmt::format("Time elapsed: {}", time.count()) << "\n";
}

MandelbrotWindow::MandelbrotWindow()
Expand Down
3 changes: 2 additions & 1 deletion source/mandelbrot/mandelbrot_window.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "config.hpp"
#include "coordinates.hpp"
#include "graphics/display_event_observer.hpp"
#include "units.hpp"

#include <SFML/Graphics/Drawable.hpp>
#include <SFML/Graphics/Image.hpp>
Expand All @@ -22,7 +23,7 @@ class MandelbrotWindow : public DisplayEventObserver {

void on_resize_(display_domain new_domain_selection);
void draw_coordinate_(
const display_coordinate& display_coord, const complex_coordinate& complex_coord
display_coordinate display_coord, const avx512_complex& complex_coords
);

public:
Expand Down
12 changes: 11 additions & 1 deletion source/units.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@ using iteration_count = std::uint16_t;
using complex_underlying = boost::multiprecision::number<small_float>;
using complex = boost::multiprecision::complex_adaptor<small_float>;*/

using complex_underlying = __float128;
using complex_underlying = double;

struct avx512_complex {
std::array<complex_underlying, 8> real;
std::array<complex_underlying, 8> imaginary;

std::complex<complex_underlying> get_complex(uint8_t index)
{
return {real[index], imaginary[index]};
}
};

struct color {
uint8_t red;
Expand Down

0 comments on commit 7cf35e8

Please sign in to comment.