Skip to content

Commit

Permalink
Speed up putImageData for RGBA canvases
Browse files Browse the repository at this point in the history
  • Loading branch information
zbjornson committed Jun 13, 2020
1 parent fe186e5 commit 90561b1
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ project adheres to [Semantic Versioning](http://semver.org/).
==================
### Changed
* Switch CI to Github Actions. (Adds Windows and macOS builds.)
* Speed up `putImageData` for RGBA32 canvases.
### Added
* Export `rsvgVersion`.
### Fixed
Expand Down
24 changes: 23 additions & 1 deletion benchmarks/run.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* milliseconds to complete.
*/

var createCanvas = require('../').createCanvas
var {createCanvas, ImageData} = require('../')
var canvas = createCanvas(200, 200)
var largeCanvas = createCanvas(1000, 1000)
var ctx = canvas.getContext('2d')
Expand Down Expand Up @@ -64,6 +64,28 @@ function done (benchmark, times, start, isAsync) {

// node-canvas

const id0 = new ImageData(200, 200)

bm('putImageData, all a=0', function () {
ctx.putImageData(id0, 0, 0)
})

const id255 = new ImageData(200, 200)
id255.data.fill(0xFF)

bm('putImageData, all a=0xFF', function () {
ctx.putImageData(id255, 0, 0)
})

const idRand = new ImageData(200, 200)
for (let i = 0; i < idRand.data.length; i++) {
idRand.data[i] = 255 * Math.random()
}

bm('putImageData, mixed a', function () {
ctx.putImageData(idRand, 0, 0)
})

bm('fillStyle= name', function () {
ctx.fillStyle = 'transparent'
})
Expand Down
95 changes: 66 additions & 29 deletions src/CanvasRenderingContext2d.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,29 @@

using namespace v8;

// Windows doesn't support the C99 names for these
#ifdef _MSC_VER
#define isnan(x) _isnan(x)
#define isinf(x) (!_finite(x))
// Windows doesn't support the C99 names for these. TODO unnecessary,
// should be using std::isnan.
# define isnan(x) _isnan(x)
# define isinf(x) (!_finite(x))
# include <intrin.h>
# define bswap32 _byteswap_ulong
#else
# ifdef __x86_64__
# include <x86intrin.h>
# endif
# define bswap32 __builtin_bswap32
#endif

static inline uint32_t rotr(uint32_t n, unsigned int c) {
// GCC has no portable _rotr intrinsic, so rely on idiom recognition. Works
// for all supported versions of MSVC, GCC x86, GCC ARM, Clang.
// https://stackoverflow.com/a/776523/1218408
const unsigned int mask = CHAR_BIT * sizeof(n) - 1;
c &= mask;
return (n >> c) | (n << ((~c + 1) & mask));
}

#ifndef isnan
#define isnan(x) std::isnan(x)
#define isinf(x) std::isinf(x)
Expand Down Expand Up @@ -852,32 +869,52 @@ NAN_METHOD(Context2d::PutImageData) {
for (int y = 0; y < rows; ++y) {
uint8_t *dstRow = dst;
uint8_t *srcRow = src;
for (int x = 0; x < cols; ++x) {
// rgba
uint8_t r = *srcRow++;
uint8_t g = *srcRow++;
uint8_t b = *srcRow++;
uint8_t a = *srcRow++;

// argb
// performance optimization: fully transparent/opaque pixels can be
// processed more efficiently.
#if defined(__x86_64__) || defined(_M_X64)
int x = 0;
for (; x < cols - 2; x += 2) {
__m128i px;
memcpy(&px, srcRow, 8); // gcc doesn't define _mm_loadu_si64
px = _mm_unpacklo_epi8(px, _mm_setzero_si128());
// rgba -> bgra
px = _mm_shufflelo_epi16(px, 0b11000110);
px = _mm_shufflehi_epi16(px, 0b11000110);
// broadcast alpha
__m128i av = _mm_shufflelo_epi16(px, 0b11111111);
av = _mm_shufflehi_epi16(av, 0b11111111);
// Set alpha channel to 255 to undo upcoming division by 255
av = _mm_and_si128(av, _mm_setr_epi16(0xFFFF, 0xFFFF, 0xFFFF, 0, 0xFFFF, 0xFFFF, 0xFFFF, 0));
av = _mm_or_si128(av, _mm_setr_epi16(0, 0, 0, 255, 0, 0, 0, 255));
px = _mm_mullo_epi16(px, av);
// divide by 255
px = _mm_mulhi_epu16(px, _mm_set1_epi16(0x8081));
px = _mm_srli_epi16(px, 7);
// pack int16 to int8
px = _mm_packus_epi16(px, px);
memcpy(dstRow, &px, 8);
dstRow += 8;
srcRow += 8;
}
if (x < cols) {
#else
for (int x = 0; x < cols; x++) {
#endif
uint32_t c;
memcpy(&c, srcRow, 4); // rgba (LE)
srcRow += 4;
uint32_t a = c >> 24;
if (a == 0) {
*dstRow++ = 0;
*dstRow++ = 0;
*dstRow++ = 0;
*dstRow++ = 0;
} else if (a == 255) {
*dstRow++ = b;
*dstRow++ = g;
*dstRow++ = r;
*dstRow++ = a;
uint32_t zero = 0;
memcpy(dstRow, &zero, 4);
} else if (a == 255) { // rgba (LE)
c = bswap32(c); // abgr
c = rotr(c, 8); // bgra
memcpy(dstRow, &c, 4);
} else {
float alpha = (float)a / 255;
*dstRow++ = b * alpha;
*dstRow++ = g * alpha;
*dstRow++ = r * alpha;
*dstRow++ = a;
uint8_t r = (c & 0xFF) * a / 255;
uint8_t g = (c >> 8 & 0xFF) * a / 255;
uint8_t b = (c >> 16 & 0xFF) * a / 255;
uint32_t bgra = (a << 24) | (r << 16) | (g << 8) | b;
memcpy(dstRow, &bgra, 4);
}
}
dst += dstStride;
Expand All @@ -892,13 +929,13 @@ NAN_METHOD(Context2d::PutImageData) {
uint8_t *dstRow = dst;
uint8_t *srcRow = src;
for (int x = 0; x < cols; ++x) {
// rgba
// rgb[a]
uint8_t r = *srcRow++;
uint8_t g = *srcRow++;
uint8_t b = *srcRow++;
srcRow++;

// argb
// bgra
*dstRow++ = b;
*dstRow++ = g;
*dstRow++ = r;
Expand Down

0 comments on commit 90561b1

Please sign in to comment.