diff --git a/CHANGELOG.md b/CHANGELOG.md index 92e63fbfa..36813f070 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ project adheres to [Semantic Versioning](http://semver.org/). ================== ### Changed * Switch CI to Github Actions. (Adds Windows and macOS builds.) +* Speed up `putImageData` for RGBA32 canvases. ### Added * Export `rsvgVersion`. ### Fixed diff --git a/benchmarks/run.js b/benchmarks/run.js index 4914ea97b..58ca2e066 100644 --- a/benchmarks/run.js +++ b/benchmarks/run.js @@ -4,7 +4,7 @@ * milliseconds to complete. */ -var createCanvas = require('../').createCanvas +var {createCanvas, ImageData} = require('../') var canvas = createCanvas(200, 200) var largeCanvas = createCanvas(1000, 1000) var ctx = canvas.getContext('2d') @@ -64,6 +64,28 @@ function done (benchmark, times, start, isAsync) { // node-canvas +const id0 = new ImageData(200, 200) + +bm('putImageData, all a=0', function () { + ctx.putImageData(id0, 0, 0) +}) + +const id255 = new ImageData(200, 200) +id255.data.fill(0xFF) + +bm('putImageData, all a=0xFF', function () { + ctx.putImageData(id255, 0, 0) +}) + +const idRand = new ImageData(200, 200) +for (let i = 0; i < idRand.data.length; i++) { + idRand.data[i] = 255 * Math.random() +} + +bm('putImageData, mixed a', function () { + ctx.putImageData(idRand, 0, 0) +}) + bm('fillStyle= name', function () { ctx.fillStyle = 'transparent' }) diff --git a/src/CanvasRenderingContext2d.cc b/src/CanvasRenderingContext2d.cc index 774612708..5c4f6726f 100644 --- a/src/CanvasRenderingContext2d.cc +++ b/src/CanvasRenderingContext2d.cc @@ -21,12 +21,29 @@ using namespace v8; -// Windows doesn't support the C99 names for these #ifdef _MSC_VER -#define isnan(x) _isnan(x) -#define isinf(x) (!_finite(x)) +// Windows doesn't support the C99 names for these. TODO unnecessary, +// should be using std::isnan. +# define isnan(x) _isnan(x) +# define isinf(x) (!_finite(x)) +# include +# define bswap32 _byteswap_ulong +#else +# ifdef __x86_64__ +# include +# endif +# define bswap32 __builtin_bswap32 #endif +static inline uint32_t rotr(uint32_t n, unsigned int c) { + // GCC has no portable _rotr intrinsic, so rely on idiom recognition. Works + // for all supported versions of MSVC, GCC x86, GCC ARM, Clang. + // https://stackoverflow.com/a/776523/1218408 + const unsigned int mask = CHAR_BIT * sizeof(n) - 1; + c &= mask; + return (n >> c) | (n << ((~c + 1) & mask)); +} + #ifndef isnan #define isnan(x) std::isnan(x) #define isinf(x) std::isinf(x) @@ -852,32 +869,52 @@ NAN_METHOD(Context2d::PutImageData) { for (int y = 0; y < rows; ++y) { uint8_t *dstRow = dst; uint8_t *srcRow = src; - for (int x = 0; x < cols; ++x) { - // rgba - uint8_t r = *srcRow++; - uint8_t g = *srcRow++; - uint8_t b = *srcRow++; - uint8_t a = *srcRow++; - - // argb - // performance optimization: fully transparent/opaque pixels can be - // processed more efficiently. +#if defined(__x86_64__) || defined(_M_X64) + int x = 0; + for (; x < cols - 2; x += 2) { + __m128i px; + memcpy(&px, srcRow, 8); // gcc doesn't define _mm_loadu_si64 + px = _mm_unpacklo_epi8(px, _mm_setzero_si128()); + // rgba -> bgra + px = _mm_shufflelo_epi16(px, 0b11000110); + px = _mm_shufflehi_epi16(px, 0b11000110); + // broadcast alpha + __m128i av = _mm_shufflelo_epi16(px, 0b11111111); + av = _mm_shufflehi_epi16(av, 0b11111111); + // Set alpha channel to 255 to undo upcoming division by 255 + av = _mm_and_si128(av, _mm_setr_epi16(0xFFFF, 0xFFFF, 0xFFFF, 0, 0xFFFF, 0xFFFF, 0xFFFF, 0)); + av = _mm_or_si128(av, _mm_setr_epi16(0, 0, 0, 255, 0, 0, 0, 255)); + px = _mm_mullo_epi16(px, av); + // divide by 255 + px = _mm_mulhi_epu16(px, _mm_set1_epi16(0x8081)); + px = _mm_srli_epi16(px, 7); + // pack int16 to int8 + px = _mm_packus_epi16(px, px); + memcpy(dstRow, &px, 8); + dstRow += 8; + srcRow += 8; + } + if (x < cols) { +#else + for (int x = 0; x < cols; x++) { +#endif + uint32_t c; + memcpy(&c, srcRow, 4); // rgba (LE) + srcRow += 4; + uint32_t a = c >> 24; if (a == 0) { - *dstRow++ = 0; - *dstRow++ = 0; - *dstRow++ = 0; - *dstRow++ = 0; - } else if (a == 255) { - *dstRow++ = b; - *dstRow++ = g; - *dstRow++ = r; - *dstRow++ = a; + uint32_t zero = 0; + memcpy(dstRow, &zero, 4); + } else if (a == 255) { // rgba (LE) + c = bswap32(c); // abgr + c = rotr(c, 8); // bgra + memcpy(dstRow, &c, 4); } else { - float alpha = (float)a / 255; - *dstRow++ = b * alpha; - *dstRow++ = g * alpha; - *dstRow++ = r * alpha; - *dstRow++ = a; + uint8_t r = (c & 0xFF) * a / 255; + uint8_t g = (c >> 8 & 0xFF) * a / 255; + uint8_t b = (c >> 16 & 0xFF) * a / 255; + uint32_t bgra = (a << 24) | (r << 16) | (g << 8) | b; + memcpy(dstRow, &bgra, 4); } } dst += dstStride; @@ -892,13 +929,13 @@ NAN_METHOD(Context2d::PutImageData) { uint8_t *dstRow = dst; uint8_t *srcRow = src; for (int x = 0; x < cols; ++x) { - // rgba + // rgb[a] uint8_t r = *srcRow++; uint8_t g = *srcRow++; uint8_t b = *srcRow++; srcRow++; - // argb + // bgra *dstRow++ = b; *dstRow++ = g; *dstRow++ = r;