Skip to content

Commit

Permalink
Support for Apple M1
Browse files Browse the repository at this point in the history
  • Loading branch information
mhochsteger committed Dec 15, 2020
1 parent 1c0e153 commit 2fd5b19
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 14 deletions.
11 changes: 5 additions & 6 deletions basiclinalg/ngblas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3792,7 +3792,7 @@ namespace ngbla
return ssum;
}

#elif defined __SSE__
#elif defined NETGEN_ARCH_AMD64

double MatKernelMaskedScalAB (size_t n,
double * pa, size_t da,
Expand Down Expand Up @@ -3853,21 +3853,20 @@ namespace ngbla
double * pb, size_t db,
const BitArray & ba)
{
double sum = 0;
double vhsum[8] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
int i(0);
for ( ; i+8 < fa.Size(); i += 8)
for ( ; i+8 <= n; i += 8)
{
for (int j = 0; j < 8; j++)
{
double hprod = fa(i+j)*fb(i+j);
double hprod = pa[i+j]*pb[i+j];
if (ba.Test(i+j))
vhsum[j] += hprod;
}
}
for ( ; i < fa.Size(); i++)
for ( ; i < n; i++)
if (ba.Test(i))
sum += fa(i)*fb(i);
vhsum[0] += pa[i]*pb[i];
for (int j = 1; j < 8; j++)
vhsum[0] += vhsum[j];
return vhsum[0];
Expand Down
2 changes: 1 addition & 1 deletion basiclinalg/ngblas.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

// optimized matrix kernels

#ifdef __clang__
#if defined(__clang__ ) && defined(NETGEN_ARCH_AMD64)
#define REGCALL __regcall
#else
#define REGCALL
Expand Down
6 changes: 3 additions & 3 deletions comp/meshaccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2497,13 +2497,13 @@ namespace ngcomp
atomic<size_t> ProgressOutput :: cnt;
thread_local size_t ProgressOutput :: thd_cnt = 0;
// thread_local double ProgressOutput :: thd_prev_time = WallTime();
thread_local size_t ProgressOutput :: thd_prev_time = __rdtsc();
size_t tsc_wait = 0.05*2.7e9; // rough
thread_local size_t ProgressOutput :: thd_prev_time = GetTimeCounter();
size_t tsc_wait = 0.05*(1.0/seconds_per_tick);
void ProgressOutput :: Update ()
{
thd_cnt++;
// double time = WallTime();
size_t time = __rdtsc();
size_t time = GetTimeCounter();
// if (time > thd_prev_time+0.05)
if (time > thd_prev_time+tsc_wait)
{
Expand Down
6 changes: 6 additions & 0 deletions fem/intrule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3277,8 +3277,14 @@ namespace ngfem
nip = ir.GetNIP();
this->size = (ir.Size()+SIMD<IntegrationPoint>::Size()-1) / SIMD<IntegrationPoint>::Size();

#ifdef NETGEN_ARCH_AMD64
this -> mem_to_delete = (SIMD<IntegrationPoint>*)
_mm_malloc(this->size*sizeof(SIMD<IntegrationPoint>), SIMD<double>::Size()*sizeof(double));
#else // NETGEN_ARCH_AMD64
this -> mem_to_delete = (SIMD<IntegrationPoint>*)
malloc(this->size*sizeof(SIMD<IntegrationPoint>));
#endif // NETGEN_ARCH_AMD64

this->data = this->mem_to_delete;

dimension = ir.Dim();
Expand Down
8 changes: 7 additions & 1 deletion fem/intrule.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2283,7 +2283,13 @@ namespace ngfem
SIMD_IntegrationRule (int nip, LocalHeap & lh);
NGS_DLL_HEADER ~SIMD_IntegrationRule ()
{
if (mem_to_delete) _mm_free(mem_to_delete);
#ifdef NETGEN_ARCH_AMD64
if (mem_to_delete)
_mm_free(mem_to_delete);
#else // NETGEN_ARCH_AMD64
if (mem_to_delete)
free(mem_to_delete);
#endif // NETGEN_ARCH_AMD64
mem_to_delete = nullptr;
}

Expand Down
1 change: 0 additions & 1 deletion include/ngs_stdcpp_include.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@
#endif


#include <immintrin.h>


#ifndef __assume
Expand Down
2 changes: 2 additions & 0 deletions linalg/sparsematrix_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace ngla
void SparseMatrixTM<TM> ::
PrefetchRow (int rownr) const
{
#ifdef NETGEN_ARCH_AMD64
#ifdef __GNUC__
size_t fi = firsti[rownr], fin = firsti[rownr+1];
// int * pi = &colnr[fi], * pin = &colnr[fin];
Expand All @@ -38,6 +39,7 @@ namespace ngla
vi += 64/sizeof(double);
}
#endif
#endif // NETGEN_ARCH_AMD64
;
}

Expand Down
4 changes: 2 additions & 2 deletions ngstd/autodiffdiff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,12 +456,12 @@ inline AutoDiffDiff<D, SCAL> sqrt (const AutoDiffDiff<D, SCAL> & x)
AutoDiffDiff<D, SCAL> res;
res.Value() = sqrt(x.Value());
for (int j = 0; j < D; j++)
res.DValue(j) = IfZero(x.DValue(j),0.,0.5 / res.Value() * x.DValue(j));
res.DValue(j) = IfZero(x.DValue(j),SCAL{0.},0.5 / res.Value() * x.DValue(j));


for (int i = 0; i < D; i++)
for (int j = 0; j < D; j++)
res.DDValue(i,j) = IfZero(x.DDValue(i,j)+x.DValue(i) * x.DValue(j),0.,0.5/res.Value() * x.DDValue(i,j) - 0.25 / (x.Value()*res.Value()) * x.DValue(i) * x.DValue(j));
res.DDValue(i,j) = IfZero(x.DDValue(i,j)+x.DValue(i) * x.DValue(j),SCAL{0.},0.5/res.Value() * x.DDValue(i,j) - 0.25 / (x.Value()*res.Value()) * x.DValue(i) * x.DValue(j));

return res;
}
Expand Down

0 comments on commit 2fd5b19

Please sign in to comment.