From c62a19bd705ad5a99501f41b27de9c4a0a88621b Mon Sep 17 00:00:00 2001 From: stdlib-bot Date: Tue, 24 Sep 2024 03:32:30 +0000 Subject: [PATCH] Auto-generated commit --- .github/.keepalive | 1 - CHANGELOG.md | 16 ++++++- CONTRIBUTORS | 7 +++ README.md | 32 ++++++++++++++ benchmark/c/benchmark.length.c | 46 ++++++++++++++++++- examples/c/example.c | 6 +++ include/stdlib/blas/base/ddot.h | 5 +++ lib/ndarray.native.js | 15 +------ manifest.json | 69 ++++++++++++++++++++--------- package.json | 3 +- src/addon.c | 22 +++++++++- src/ddot.c | 48 ++------------------ src/ddot_cblas.c | 25 +++++++++-- src/ddot_f.c | 24 +++++++++- src/ddot_ndarray.c | 78 +++++++++++++++++++++++++++++++++ 15 files changed, 307 insertions(+), 90 deletions(-) delete mode 100644 .github/.keepalive create mode 100644 src/ddot_ndarray.c diff --git a/.github/.keepalive b/.github/.keepalive deleted file mode 100644 index 63562ba..0000000 --- a/.github/.keepalive +++ /dev/null @@ -1 +0,0 @@ -2024-09-01T02:10:20.904Z diff --git a/CHANGELOG.md b/CHANGELOG.md index a250ba0..8c60bbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,17 @@
-## Unreleased (2024-09-01) +## Unreleased (2024-09-24) + +
+ +### Features + +- [`e4f28df`](https://github.com/stdlib-js/stdlib/commit/e4f28df214f15513bdafeb5a6bbc3bde115dc63b) - add C `ndarray` implementation for `blas/base/ddot` [(#2936)](https://github.com/stdlib-js/stdlib/pull/2936) + +
+ +
@@ -12,6 +22,7 @@
+- [`e4f28df`](https://github.com/stdlib-js/stdlib/commit/e4f28df214f15513bdafeb5a6bbc3bde115dc63b) - **feat:** add C `ndarray` implementation for `blas/base/ddot` [(#2936)](https://github.com/stdlib-js/stdlib/pull/2936) _(by Aman Bhansali)_ - [`2777e4b`](https://github.com/stdlib-js/stdlib/commit/2777e4be161869d09406e3b17947d24c64b47af2) - **bench:** resolve lint errors in benchmarks _(by Athan Reines)_
@@ -24,8 +35,9 @@ ### Contributors -A total of 1 person contributed to this release. Thank you to this contributor: +A total of 2 people contributed to this release. Thank you to the following contributors: +- Aman Bhansali - Athan Reines
diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 57d1184..147a89e 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -2,6 +2,7 @@ # # Contributors listed in alphabetical order. +Aayush Khanna <96649223+aayush0325@users.noreply.github.com> Adarsh Palaskar Aditya Sapra AgPriyanshu18 <113460573+AgPriyanshu18@users.noreply.github.com> @@ -26,17 +27,20 @@ EuniceSim142 <77243938+EuniceSim142@users.noreply.github.com> Frank Kovacs Golden Kumar <103646877+AuenKr@users.noreply.github.com> Gunj Joshi +HarshaNP <96897754+GittyHarsha@users.noreply.github.com> Harshita Kalani Hridyanshu <124202756+HRIDYANSHU054@users.noreply.github.com> Jaimin Godhani <112328542+Jai0401@users.noreply.github.com> James Gelok Jaysukh Makvana +Jenish Thapa <141203631+jenish-thapa@users.noreply.github.com> Jithin KS Joel Mathew Koshy Joey Reed Jordan Gallivan <115050475+Jordan-Gallivan@users.noreply.github.com> Joris Labie Justin Dennison +Kaif Mohd Karthik Prakash <116057817+skoriop@users.noreply.github.com> Khaldon Krishnendu Das <86651039+itskdhere@users.noreply.github.com> @@ -86,8 +90,10 @@ Stephannie Jiménez Gacha Suraj kumar <125961509+kumarsuraj212003@users.noreply.github.com> Tirtadwipa Manunggal Tudor Pagu <104032457+tudor-pagu@users.noreply.github.com> +Tufailahmed Bargir <142114244+Tufailahmed-Bargir@users.noreply.github.com> Utkarsh Utkarsh Raj +Vaibhav Patel <98279986+noobCoderVP@users.noreply.github.com> Varad Gupta Xiaochuan Ye Yernar Yergaziyev @@ -96,3 +102,4 @@ nishant-s7 <97207366+nishant-s7@users.noreply.github.com> orimiles5 <97595296+orimiles5@users.noreply.github.com> rainn <88160429+AmCodesLame@users.noreply.github.com> rei2hu +yaswanth <116426380+yaswanthkosuru@users.noreply.github.com> diff --git a/README.md b/README.md index 51cb5e7..c11d0ae 100644 --- a/README.md +++ b/README.md @@ -260,6 +260,32 @@ The function accepts the following arguments: double c_ddot( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const double *Y, const CBLAS_INT strideY ); ``` +#### c_ddot_ndarray( N, \*X, strideX, offsetX, \*Y, strideY, offsetY ) + +Computes the dot product of two double-precision floating-point vectors using alternative indexing semantics. + +```c +const double x[] = { 4.0, 2.0, -3.0, 5.0, -1.0 }; +const double y[] = { 2.0, 6.0, -1.0, -4.0, 8.0 }; + +double v = c_ddot_ndarray( 5, x, -1, 4, y, -1, 4 ); +// returns -5.0 +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **X**: `[in] double*` first input array. +- **strideX**: `[in] CBLAS_INT` index increment for `X`. +- **offsetX**: `[in] CBLAS_INT` starting index for `X`. +- **Y**: `[in] double*` second input array. +- **strideY**: `[in] CBLAS_INT` index increment for `Y`. +- **offsetY**: `[in] CBLAS_INT` starting index for `Y`. + +```c +double c_ddot_ndarray( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const double *Y, const CBLAS_INT strideY. const CBLAS_INT offsetY ); +``` +
@@ -299,6 +325,12 @@ int main( void ) { // Print the result: printf( "dot product: %lf\n", d ); + + // Compute the dot product: + d = c_ddot_ndarray( N, x, strideX, 0, y, strideY, N-1 ); + + // Print the result: + printf( "dot product: %lf\n", d ); } ``` diff --git a/benchmark/c/benchmark.length.c b/benchmark/c/benchmark.length.c index 91cbd20..362d748 100644 --- a/benchmark/c/benchmark.length.c +++ b/benchmark/c/benchmark.length.c @@ -94,7 +94,7 @@ static double rand_double( void ) { * @param len array length * @return elapsed time in seconds */ -static double benchmark( int iterations, int len ) { +static double benchmark1( int iterations, int len ) { double elapsed; double x[ len ]; double y[ len ]; @@ -122,6 +122,41 @@ static double benchmark( int iterations, int len ) { return elapsed; } +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param len array length +* @return elapsed time in seconds +*/ +static double benchmark2( int iterations, int len ) { + double elapsed; + double x[ len ]; + double y[ len ]; + double z; + double t; + int i; + + for ( i = 0; i < len; i++ ) { + x[ i ] = ( rand_double()*20000.0 ) - 10000.0; + y[ i ] = ( rand_double()*20000.0 ) - 10000.0; + } + z = 0.0; + t = tic(); + for ( i = 0; i < iterations; i++ ) { + z = c_ddot_ndarray( len, x, 1, 0, y, 1, 0 ); + if ( z != z ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( z != z ) { + printf( "should not return NaN\n" ); + } + return elapsed; +} + /** * Main execution sequence. */ @@ -144,7 +179,14 @@ int main( void ) { for ( j = 0; j < REPEATS; j++ ) { count += 1; printf( "# c::%s:len=%d\n", NAME, len ); - elapsed = benchmark( iter, len ); + elapsed = benchmark1( iter, len ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:ndarray:len=%d\n", NAME, len ); + elapsed = benchmark2( iter, len ); print_results( iter, elapsed ); printf( "ok %d benchmark finished\n", count ); } diff --git a/examples/c/example.c b/examples/c/example.c index c205020..99cc7e8 100644 --- a/examples/c/example.c +++ b/examples/c/example.c @@ -36,4 +36,10 @@ int main( void ) { // Print the result: printf( "dot product: %lf\n", d ); + + // Compute the dot product: + d = c_ddot_ndarray( N, x, strideX, 0, y, strideY, N-1 ); + + // Print the result: + printf( "dot product: %lf\n", d ); } diff --git a/include/stdlib/blas/base/ddot.h b/include/stdlib/blas/base/ddot.h index 2368577..beb870a 100644 --- a/include/stdlib/blas/base/ddot.h +++ b/include/stdlib/blas/base/ddot.h @@ -36,6 +36,11 @@ extern "C" { */ double API_SUFFIX(c_ddot)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const double *Y, const CBLAS_INT strideY ); +/** +* Computes the dot product of two double-precision floating-point vectors using alternative indexing semantics. +*/ +double API_SUFFIX(c_ddot_ndarray)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const double *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); + #ifdef __cplusplus } #endif diff --git a/lib/ndarray.native.js b/lib/ndarray.native.js index 64fa34b..d387975 100644 --- a/lib/ndarray.native.js +++ b/lib/ndarray.native.js @@ -20,9 +20,7 @@ // MODULES // -var minViewBufferIndex = require( '@stdlib/strided-base-min-view-buffer-index' ); -var offsetView = require( '@stdlib/strided-base-offset-view' ); -var addon = require( './ddot.native.js' ); +var addon = require( './../src/addon.node' ); // MAIN // @@ -49,16 +47,7 @@ var addon = require( './ddot.native.js' ); * // returns -5.0 */ function ddot( N, x, strideX, offsetX, y, strideY, offsetY ) { - var viewX; - var viewY; - - offsetX = minViewBufferIndex( N, strideX, offsetX ); - offsetY = minViewBufferIndex( N, strideY, offsetY ); - - viewX = offsetView( x, offsetX ); - viewY = offsetView( y, offsetY ); - - return addon( N, viewX, strideX, viewY, strideY ); + return addon.ndarray( N, x, strideX, offsetX, y, strideY, offsetY ); } diff --git a/manifest.json b/manifest.json index bcd1976..4dc0d8f 100644 --- a/manifest.json +++ b/manifest.json @@ -45,6 +45,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-export", "@stdlib/napi-argv", "@stdlib/napi-argv-int64", @@ -58,7 +59,8 @@ "blas": "", "wasm": false, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -66,7 +68,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" ] }, { @@ -75,7 +78,8 @@ "blas": "", "wasm": false, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -83,7 +87,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" ] }, @@ -105,6 +110,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-export", "@stdlib/napi-argv", "@stdlib/napi-argv-int64", @@ -129,7 +135,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" ] }, { @@ -149,7 +156,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" ] }, @@ -170,6 +178,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-export", "@stdlib/napi-argv", "@stdlib/napi-argv-int64", @@ -183,7 +192,8 @@ "blas": "", "wasm": false, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -191,7 +201,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" ] }, { @@ -200,7 +211,8 @@ "blas": "", "wasm": false, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -208,7 +220,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" ] }, @@ -229,6 +242,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-export", "@stdlib/napi-argv", "@stdlib/napi-argv-int64", @@ -252,7 +266,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" ] }, { @@ -271,7 +286,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" ] }, @@ -293,6 +309,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-export", "@stdlib/napi-argv", "@stdlib/napi-argv-int64", @@ -317,7 +334,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" ] }, { @@ -337,7 +355,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" ] }, @@ -347,7 +366,8 @@ "blas": "", "wasm": false, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -356,6 +376,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset", "@stdlib/napi-export", "@stdlib/napi-argv", "@stdlib/napi-argv-int64", @@ -369,7 +390,8 @@ "blas": "", "wasm": false, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -377,7 +399,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" ] }, { @@ -386,7 +409,8 @@ "blas": "", "wasm": false, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -394,7 +418,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" ] }, @@ -404,7 +429,8 @@ "blas": "", "wasm": true, "src": [ - "./src/ddot.c" + "./src/ddot.c", + "./src/ddot_ndarray.c" ], "include": [ "./include" @@ -412,7 +438,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas-base-shared" + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" ] } ] diff --git a/package.json b/package.json index eec9034..4115570 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "@stdlib/napi-argv-strided-float64array": "^0.2.2", "@stdlib/napi-create-double": "^0.0.2", "@stdlib/napi-export": "^0.2.2", + "@stdlib/strided-base-min-view-buffer-index": "^0.2.2", "@stdlib/strided-base-stride2offset": "^0.1.0", "@stdlib/utils-define-nonenumerable-read-only-property": "^0.2.2", "@stdlib/utils-library-manifest": "^0.2.2", @@ -60,8 +61,6 @@ "@stdlib/math-base-special-pow": "^0.3.0", "@stdlib/random-array-discrete-uniform": "^0.2.1", "@stdlib/random-array-uniform": "^0.2.1", - "@stdlib/strided-base-min-view-buffer-index": "^0.2.2", - "@stdlib/strided-base-offset-view": "^0.2.2", "proxyquire": "^2.0.0", "tape": "git+https://github.com/kgryte/tape.git#fix/globby", "istanbul": "^0.4.1", diff --git a/src/addon.c b/src/addon.c index ea5ab4b..500f6b5 100644 --- a/src/addon.c +++ b/src/addon.c @@ -43,4 +43,24 @@ static napi_value addon( napi_env env, napi_callback_info info ) { return v; } -STDLIB_NAPI_MODULE_EXPORT_FCN( addon ) +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon_method( napi_env env, napi_callback_info info ) { + STDLIB_NAPI_ARGV( env, info, argv, argc, 7 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 ); + STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 2 ); + STDLIB_NAPI_ARGV_INT64( env, offsetX, argv, 3 ); + STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 5 ); + STDLIB_NAPI_ARGV_INT64( env, offsetY, argv, 6 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY( env, X, N, strideX, argv, 1 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY( env, Y, N, strideY, argv, 4 ); + STDLIB_NAPI_CREATE_DOUBLE( env, API_SUFFIX(c_ddot_ndarray)( N, X, strideX, offsetX, Y, strideY, offsetY ), v ); + return v; +} + +STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method ) diff --git a/src/ddot.c b/src/ddot.c index d139591..272fa25 100644 --- a/src/ddot.c +++ b/src/ddot.c @@ -18,6 +18,7 @@ #include "stdlib/blas/base/ddot.h" #include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/stride2offset.h" /** * Computes the dot product of two double-precision floating-point vectors. @@ -30,49 +31,8 @@ * @return the dot product */ double API_SUFFIX(c_ddot)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const double *Y, const CBLAS_INT strideY ) { - double dot; - CBLAS_INT ix; - CBLAS_INT iy; - CBLAS_INT m; - CBLAS_INT i; - - dot = 0.0; - if ( N <= 0 ) { - return dot; - } - // If both strides are equal to `1`, use unrolled loops... - if ( strideX == 1 && strideY == 1 ) { - m = N % 5; - - // If we have a remainder, do a clean-up loop... - if ( m > 0 ) { - for ( i = 0; i < m; i++ ) { - dot += X[ i ] * Y[ i ]; - } - } - if ( N < 5 ) { - return dot; - } - for ( i = m; i < N; i += 5 ) { - dot += ( X[i]*Y[i] ) + ( X[i+1]*Y[i+1] ) + ( X[i+2]*Y[i+2] ) + ( X[i+3]*Y[i+3] ) + ( X[i+4]*Y[i+4] ); - } - return dot; - } - if ( strideX < 0 ) { - ix = (1-N) * strideX; - } else { - ix = 0; - } - if ( strideY < 0 ) { - iy = (1-N) * strideY; - } else { - iy = 0; - } - for ( i = 0; i < N; i++ ) { - dot += X[ ix ] * Y[ iy ]; - ix += strideX; - iy += strideY; - } - return dot; + CBLAS_INT ox = stdlib_strided_stride2offset( N, strideX ); + CBLAS_INT oy = stdlib_strided_stride2offset( N, strideY ); + return API_SUFFIX(c_ddot_ndarray)( N, X, strideX, ox, Y, strideY, oy ); } diff --git a/src/ddot_cblas.c b/src/ddot_cblas.c index 53b5dce..c776356 100644 --- a/src/ddot_cblas.c +++ b/src/ddot_cblas.c @@ -19,17 +19,36 @@ #include "stdlib/blas/base/ddot.h" #include "stdlib/blas/base/ddot_cblas.h" #include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/min_view_buffer_index.h" /** * Computes the dot product of two double-precision floating-point vectors. * * @param N number of indexed elements -* @param X first array +* @param X first input array * @param strideX X stride length -* @param Y second array +* @param Y second input array * @param strideY Y stride length -* @return the dot product +* @return dot product */ double API_SUFFIX(c_ddot)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const double *Y, const CBLAS_INT strideY ) { return API_SUFFIX(cblas_ddot)( N, X, strideX, Y, strideY ); } + +/** +* Computes the dot product of two double-precision floating-point vectors using alternative indexing semantics. +* +* @param N number of indexed elements +* @param X first input array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second input array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return dot product +*/ +double API_SUFFIX(c_ddot_ndarray)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const double *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + X += stdlib_strided_min_view_buffer_index( N, strideX, offsetX ); // adjust array pointer + Y += stdlib_strided_min_view_buffer_index( N, strideY, offsetY ); // adjust array pointer + return API_SUFFIX(cblas_ddot_ndarray)( N, X, strideX, Y, strideY ); +} diff --git a/src/ddot_f.c b/src/ddot_f.c index 9184cc3..dd916e1 100644 --- a/src/ddot_f.c +++ b/src/ddot_f.c @@ -19,19 +19,41 @@ #include "stdlib/blas/base/ddot.h" #include "stdlib/blas/base/ddot_fortran.h" #include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/min_view_buffer_index.h" /** * Computes the dot product of two double-precision floating-point vectors. * * @param N number of indexed elements +* @param X first input array +* @param strideX X stride length +* @param Y second input array +* @param strideY Y stride length +* @return dot product +*/ +double API_SUFFIX(c_ddot)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const double *Y, const CBLAS_INT strideY ) { + double dot; + ddotsub( &N, X, &strideX, Y, &strideY, &dot ); + return dot; +} + +/** +* Computes the dot product of two double-precision floating-point vectors using alternative indexing semantics. +* +* @param N number of indexed elements * @param X first array * @param strideX X stride length +* @param offsetX starting index for X * @param Y second array * @param strideY Y stride length +* @param offsetY starting index for Y * @return the dot product */ -double API_SUFFIX(c_ddot)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const double *Y, const CBLAS_INT strideY ) { +double API_SUFFIX(c_ddot_ndarray)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const double *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { double dot; + + X += stdlib_strided_min_view_buffer_index( N, strideX, offsetX ); // adjust array pointer + Y += stdlib_strided_min_view_buffer_index( N, strideY, offsetY ); // adjust array pointer ddotsub( &N, X, &strideX, Y, &strideY, &dot ); return dot; } diff --git a/src/ddot_ndarray.c b/src/ddot_ndarray.c new file mode 100644 index 0000000..aecf91d --- /dev/null +++ b/src/ddot_ndarray.c @@ -0,0 +1,78 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2019 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/ddot.h" +#include "stdlib/blas/base/shared.h" + +static const CBLAS_INT M = 5; + +/** +* Computes the dot product of two double-precision floating-point vectors using alternative indexing semantics. +* +* @param N number of indexed elements +* @param X first array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return the dot product +*/ +double API_SUFFIX(c_ddot_ndarray)( const CBLAS_INT N, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const double *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + double dot; + CBLAS_INT ix; + CBLAS_INT iy; + CBLAS_INT m; + CBLAS_INT i; + + dot = 0.0; + if ( N <= 0 ) { + return dot; + } + ix = offsetX; + iy = offsetY; + + // If both strides are equal to `1`, use unrolled loops... + if ( strideX == 1 && strideY == 1 ) { + m = N % M; + + // If we have a remainder, do a clean-up loop... + if ( m > 0 ) { + for ( i = 0; i < m; i++ ) { + dot += X[ ix ] * Y[ iy ]; + ix += strideX; + iy += strideY; + } + } + if ( N < M ) { + return dot; + } + for ( i = m; i < N; i += M ) { + dot += ( X[ ix ]*Y[ iy ] ) + ( X[ ix+1 ]*Y[ iy+1 ] ) + ( X[ ix+2 ]*Y[ iy+2 ] ) + ( X[ ix+3 ]*Y[ iy+3 ] ) + ( X[ ix+4 ]*Y[ iy+4 ] ); + ix += M; + iy += M; + } + return dot; + } + for ( i = 0; i < N; i++ ) { + dot += X[ ix ] * Y[ iy ]; + ix += strideX; + iy += strideY; + } + return dot; +}