Skip to content

Commit

Permalink
v8 - Converted fluid sim into SIMD, Added incremental versioning in
Browse files Browse the repository at this point in the history
makefile
  • Loading branch information
david.wustenhagen committed Mar 24, 2024
1 parent d04f2e6 commit e62886a
Show file tree
Hide file tree
Showing 19 changed files with 199 additions and 17 deletions.
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@

APP_NAME = Sandbox
APP_VER_MAJOR = 0
APP_VER_MINOR = 8
APP_VER_BUILD = 10
APP_VER_MINOR = 9
APP_VER_BUILD = 57


DEFINES = -DAPP_NAME=\"$(APP_NAME)\" -DAPP_VER_MAJOR=$(APP_VER_MAJOR) -DAPP_VER_MINOR=$(APP_VER_MINOR) -DAPP_VER_BUILD=$(APP_VER_BUILD)

CFLAGS = -g3 -Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable
CFLAGS = -O3 -Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -pg -no-pie

EMSFLAGS = -sUSE_SDL=2 -sUSE_SDL_IMAGE=2 -sUSE_SDL_TTF=2 -pthread

Expand All @@ -25,13 +25,13 @@ emscripten: build\application_em.o build\window_em.o build\simulation_em.o
C:\Users\dwtys\emsdk\upstream\emscripten\emcc $(CFLAGS) -sASSERTIONS -sSTACK_SIZE=1048576 --emrun -Wextra build\window_em.o build\simulation_em.o build\application_em.o -o application.js $(EMSFLAGS) --preload-file Resources -sEXPORTED_RUNTIME_METHODS=cwrap -sTOTAL_MEMORY=536870912

build\application_em.o: src\application.c
C:\Users\dwtys\emsdk\upstream\emscripten\emcc $(CFLAGS) -c src\application.c -o build\application_em.o $(EMSFLAGS)
C:\Users\dwtys\emsdk\upstream\emscripten\emcc $(CFLAGS) -c src\application.c -o build\application_em.o $(EMSFLAGS) $(DEFINES)

build\window_em.o: src\window.c
C:\Users\dwtys\emsdk\upstream\emscripten\emcc $(CFLAGS) -c src\window.c -o build\window_em.o $(EMSFLAGS) -mavx -msimd128

build\simulation_em.o: src\simulation.c
C:\Users\dwtys\emsdk\upstream\emscripten\emcc $(CFLAGS) -c src\simulation.c -o build\simulation_em.o $(EMSFLAGS)
C:\Users\dwtys\emsdk\upstream\emscripten\emcc $(CFLAGS) -c src\simulation.c -o build\simulation_em.o $(EMSFLAGS) -mavx -msimd128

run: application.exe
build\$(APP_NAME).exe
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

## Todo

- [ ] Bug: Lava bleeds over to the other side of the map if you add it over the map edge.
- [x] Add a version number.
- [ ] Make transparent things sample the background framebuffer.
- [x] Make transparent things sample the background framebuffer.
- [ ] Make map buffers allocated.
- [ ] Make map resizable.
- [ ] Make starting camera position adjust to center over map regardless of size.
Expand Down
2 changes: 1 addition & 1 deletion application.js

Large diffs are not rendered by default.

Binary file modified application.wasm
Binary file not shown.
Binary file modified build/Sandbox.exe
Binary file not shown.
Binary file modified build/application.o
Binary file not shown.
Binary file added build/application_em.o
Binary file not shown.
Binary file modified build/simulation.o
Binary file not shown.
Binary file added build/simulation_em.o
Binary file not shown.
Binary file modified build/window.o
Binary file not shown.
Binary file added build/window_em.o
Binary file not shown.
Binary file added gmon.out
Binary file not shown.
20 changes: 16 additions & 4 deletions src/application.c
Original file line number Diff line number Diff line change
Expand Up @@ -1301,7 +1301,10 @@ static void generateColorMap()
memcpy(map.argbBuffer, map.argb, sizeof(map.argb));
// memcpy(map.argbBlured, map.argbBuffer, sizeof(map.argb));

gaussBlurargb(map.argbBuffer, map.argbBlured, map.w*map.h, map.w, map.h, 10);
if(window.time.tick.ms100){
gaussBlurargb(map.argbBuffer, map.argbBlured, map.w*map.h, map.w, map.h, 10);

}



Expand Down Expand Up @@ -1500,9 +1503,12 @@ argb_t getTileColorMist(int x, int y, int ys, vec2f_t upVec){
break;
}
}else{
argb.r = (102+(int)ys)>>2;//67
argb.g = (192+(int)ys)>>2;//157
argb.b = (229+(int)ys)>>2;//197

//If the mist is up against the wall, sample the background picture to make it appear transparent
//I don't know why the coordinates are like this, I just tried stuff until it worked....
argb.r = background[rendererSizeY - ys].r; //(102+(int)ys)>>2;//67
argb.g = background[rendererSizeY - ys].g; //(192+(int)ys)>>2;//157
argb.b = background[rendererSizeY - ys].b; //(229+(int)ys)>>2;//197

break;

Expand Down Expand Up @@ -2089,6 +2095,12 @@ static void init()
}
}

// for(int y=0;y<map.h;y++){
// for(int x=0;x<map.w;x++){
// map.mist[x + y * map.w].depth = 10.f;
// }
// }


}

Expand Down
177 changes: 173 additions & 4 deletions src/simulation.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
#include <math.h> //sqrtf()
#include <string.h> //memcpy()

#include <immintrin.h> //SIMD stuff
// #include <avxintrin.h>



#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
Expand Down Expand Up @@ -62,6 +67,7 @@ void advect(float* restrict densityMatrix, float* restrict bufferMatrix, vec2f_t
const float dX = (velocityVector[x+y*w].x)*dTime;
const float dY = (velocityVector[x+y*w].y)*dTime;

//If there is no velocity then there is no denisty moving into this cell, skip this cell
if(dX + dY == 0.f){
bufferMatrix[(x)+(y)*w] = densityMatrix[(x)+(y)*w];
continue;
Expand All @@ -82,6 +88,7 @@ void advect(float* restrict densityMatrix, float* restrict bufferMatrix, vec2f_t
float f3 = densityMatrix[(xdx) +(ydy+1)*w];
float f4 = densityMatrix[(xdx+1)+(ydy+1)*w];

//If no density is availibe to advect, skip this cell.
if(f1 + f2 + f3 + f4 == 0.f){
bufferMatrix[(x)+(y)*w] = densityMatrix[(x)+(y)*w];
continue;
Expand Down Expand Up @@ -236,6 +243,168 @@ void erodeAndDeposit(float* restrict subject, float* restrict suspendedSubject,


void simFluid(fluid_t* restrict fluid, float* restrict terrain, const float g, float visc, const float l, const int w, const int h, const float friction, const float dTime)
{

const float A = l*l; //Cross sectional area of pipe
fluid_t* restrict f = fluid; // shorter name
float* restrict t = terrain; // shorter name
const float v = visc;
const float dTimeAgbyl = dTime * A * g / l;

__m128 frictionVec = _mm_set1_ps(friction);
__m128 dTimeAgbylVec = _mm_set1_ps(dTimeAgbyl);
__m128 zeroVec = _mm_set1_ps(0.f);

if(visc == 0.f)
{
for (int y = 0; y < h - 0; y++)
{
const int yw = y * w;
for (int x = 0; x < w - 0; x++)
{
if((f[(x) + yw].depth) > 0.01f)
{
__m128 dirVec = _mm_set_ps(f[x + yw].right, f[x + yw].down, f[x + yw].left, f[x + yw].up);
__m128 depth0Vec = _mm_set1_ps(f[x + yw].depth);
__m128 t0Vec = _mm_set1_ps(t[x + yw]);
__m128 depth1Vec = _mm_set_ps(f[(x + 1) + (yw)].depth, f[(x) + (y + 1) * w].depth, f[(x - 1) + (yw)].depth, f[(x) + (y - 1) * w].depth);
__m128 t1Vec = _mm_set_ps(t[(x + 1) + yw], t[(x) + (y + 1) * w], t[(x - 1) + yw], t[(x) + (y - 1) * w]);
__m128 thingVec = _mm_add_ps(depth0Vec, t0Vec);
thingVec = _mm_sub_ps(thingVec, depth1Vec);
thingVec = _mm_sub_ps(thingVec, t1Vec);
// __m128 thingVec = _mm_set_ps((f[x + yw].depth + t[x + yw] - f[(x + 1) + (yw)].depth - t[(x + 1) + yw] ), (f[x + yw].depth + t[x + yw] - f[(x) + (y + 1) * w].depth - t[(x) + (y + 1) * w] ), (f[x + yw].depth + t[x + yw] - f[(x - 1) + (yw)].depth - t[(x - 1) + yw] ), (f[x + yw].depth + t[x + yw] - f[(x) + (y - 1) * w].depth - t[(x) + (y - 1) * w] ));

thingVec = _mm_mul_ps(thingVec, dTimeAgbylVec);
dirVec = _mm_mul_ps(dirVec, frictionVec);
dirVec = _mm_add_ps(dirVec, thingVec);
dirVec = _mm_max_ps(dirVec, zeroVec);

_MM_EXTRACT_FLOAT(f[x + yw].right, dirVec, 3);
_MM_EXTRACT_FLOAT(f[x + yw].down, dirVec, 2);
_MM_EXTRACT_FLOAT(f[x + yw].left, dirVec, 1);
_MM_EXTRACT_FLOAT(f[x + yw].up, dirVec, 0);


// f[x + yw].right = maxf(f[x + yw].right * friction + (f[x + yw].depth + t[x + yw] - f[(x + 1) + (yw)].depth - t[(x + 1) + yw] ) * dTimeAgbyl, 0.f);
// f[x + yw].down = maxf(f[x + yw].down * friction + (f[x + yw].depth + t[x + yw] - f[(x) + (y + 1) * w].depth - t[(x) + (y + 1) * w] ) * dTimeAgbyl, 0.f);
// f[x + yw].left = maxf(f[x + yw].left * friction + (f[x + yw].depth + t[x + yw] - f[(x - 1) + (yw)].depth - t[(x - 1) + yw] ) * dTimeAgbyl, 0.f);
// f[x + yw].up = maxf(f[x + yw].up * friction + (f[x + yw].depth + t[x + yw] - f[(x) + (y - 1) * w].depth - t[(x) + (y - 1) * w] ) * dTimeAgbyl, 0.f);

// float d = f[x + yw].depth;
// float V = (d*d) / ((d*d) + 3.f * v * dTime);
// f[x + yw].right *= V;
// f[x + yw].down *= V;
// f[x + yw].left *= V;
// f[x + yw].up *= V;

}
else
{
f[x + yw].right = 0;
f[x + yw].down = 0;
f[x + yw].left = 0;
f[x + yw].up = 0;
}
}
}
}
else
{
for (int y = 0; y < h - 0; y++)
{
const int yw = y * w;
for (int x = 0; x < w - 0; x++)
{
if((f[(x) + (y)*w].depth) > 0.01f)
{
__m128 dirVec = _mm_set_ps(f[x + yw].right, f[x + yw].down, f[x + yw].left, f[x + yw].up);
__m128 depth0Vec = _mm_set1_ps(f[x + yw].depth);
__m128 t0Vec = _mm_set1_ps(t[x + yw]);
__m128 depth1Vec = _mm_set_ps(f[(x + 1) + (yw)].depth, f[(x) + (y + 1) * w].depth, f[(x - 1) + (yw)].depth, f[(x) + (y - 1) * w].depth);
__m128 t1Vec = _mm_set_ps(t[(x + 1) + yw], t[(x) + (y + 1) * w], t[(x - 1) + yw], t[(x) + (y - 1) * w]);
__m128 thingVec = _mm_add_ps(depth0Vec, t0Vec);
thingVec = _mm_sub_ps(thingVec, depth1Vec);
thingVec = _mm_sub_ps(thingVec, t1Vec);
// __m128 thingVec = _mm_set_ps((f[x + yw].depth + t[x + yw] - f[(x + 1) + (yw)].depth - t[(x + 1) + yw] ), (f[x + yw].depth + t[x + yw] - f[(x) + (y + 1) * w].depth - t[(x) + (y + 1) * w] ), (f[x + yw].depth + t[x + yw] - f[(x - 1) + (yw)].depth - t[(x - 1) + yw] ), (f[x + yw].depth + t[x + yw] - f[(x) + (y - 1) * w].depth - t[(x) + (y - 1) * w] ));

thingVec = _mm_mul_ps(thingVec, dTimeAgbylVec);
dirVec = _mm_mul_ps(dirVec, frictionVec);
dirVec = _mm_add_ps(dirVec, thingVec);
dirVec = _mm_max_ps(dirVec, zeroVec);

_MM_EXTRACT_FLOAT(f[x + yw].right, dirVec, 3);
_MM_EXTRACT_FLOAT(f[x + yw].down, dirVec, 2);
_MM_EXTRACT_FLOAT(f[x + yw].left, dirVec, 1);
_MM_EXTRACT_FLOAT(f[x + yw].up, dirVec, 0);

// f[x + yw].right = maxf(f[x + yw].right * friction + (f[x + yw].depth + t[x + yw] - f[(x + 1) + (yw)].depth - t[(x + 1) + yw] ) * dTime * A * g / l, 0.f);
// f[x + yw].down = maxf(f[x + yw].down * friction + (f[x + yw].depth + t[x + yw] - f[(x) + (y + 1) * w].depth - t[(x) + (y + 1) * w] ) * dTime * A * g / l, 0.f);
// f[x + yw].left = maxf(f[x + yw].left * friction + (f[x + yw].depth + t[x + yw] - f[(x - 1) + (yw)].depth - t[(x - 1) + yw] ) * dTime * A * g / l, 0.f);
// f[x + yw].up = maxf(f[x + yw].up * friction + (f[x + yw].depth + t[x + yw] - f[(x) + (y - 1) * w].depth - t[(x) + (y - 1) * w] ) * dTime * A * g / l, 0.f);

float d = f[x + yw].depth;
float V = (d*d) / ((d*d) + 3.f * v * dTime);
f[x + yw].right *= V;
f[x + yw].down *= V;
f[x + yw].left *= V;
f[x + yw].up *= V;

}
else
{
f[x + y * w].right = 0;
f[x + y * w].down = 0;
f[x + y * w].left = 0;
f[x + y * w].up = 0;
}
}
}
}


// // border conditions
// for (int y = 0; y < h; y++)
// {
// f[(w - 3) + y * w].right = 0;
// f[3 + y * w].left = 0;
// }
// for (int x = 0; x < w; x++)
// {
// f[x + 3 * w].up = 0;
// f[x + (h - 3) * w].down = 0;
// }

for (int i = 0; i < w*h; i++){
// make sure flow out of cell isn't greater than inflow + existing fluid
if (f[i].depth - (f[i].right + f[i].down + f[i].left + f[i].up) < 0)
{
float K = minf(f[i].depth * l * l / ((f[i].right + f[i].down + f[i].left + f[i].up) * dTime), 1.0f);
f[i].right *= K;
f[i].down *= K;
f[i].left *= K;
f[i].up *= K;
}
}

// update depth
for (int y = 0; y < h - 0; y++)
{
for (int x = 0; x < w - 0; x++)
{
float deltaV = (f[(x - 1) + (y)*w].right + f[(x) + (y + 1) * w].up + f[(x + 1) + (y)*w].left + f[(x) + (y - 1) * w].down - (f[(x) + (y)*w].right + f[(x) + (y)*w].down + f[(x) + (y)*w].left + f[(x) + (y)*w].up)) * dTime;

f[(x) + (y)*w].depth = maxf(f[(x) + (y)*w].depth + deltaV / (l * l), 0.f);


}
}



}


void simFluidBackup(fluid_t* restrict fluid, float* restrict terrain, const float g, float visc, const float l, const int w, const int h, const float friction, const float dTime)
{

const float A = l*l; //Cross sectional area of pipe
Expand Down Expand Up @@ -263,10 +432,10 @@ void simFluid(fluid_t* restrict fluid, float* restrict terrain, const float g, f
// f[x + yw].up *= V;

}else{
f[x + y * w].right = 0;
f[x + y * w].down = 0;
f[x + y * w].left = 0;
f[x + y * w].up = 0;
f[x + yw].right = 0;
f[x + yw].down = 0;
f[x + yw].left = 0;
f[x + yw].up = 0;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/window.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
#include <stdlib.h>
#include <stdbool.h>
#include "../dependencies/include/SDL2/SDL_ttf.h"
#include "../dependencies/include/SDL2/SDL_syswm.h"
// #include "../dependencies/include/SDL2/SDL_syswm.h"
#include <math.h> //clampf
#include <immintrin.h> //SIMD stuff
#include <avxintrin.h>
// #include <avxintrin.h>

#include "window.h"

Expand Down
Binary file added test.txt
Binary file not shown.
Binary file added test2.txt
Binary file not shown.
Binary file added versions/Sandboxv7.zip
Binary file not shown.
Binary file added versions/Sandboxv8.zip
Binary file not shown.

0 comments on commit e62886a

Please sign in to comment.