diff --git a/HCADecodeService.cpp b/HCADecodeService.cpp index 6b8a6a8..2edecbf 100644 --- a/HCADecodeService.cpp +++ b/HCADecodeService.cpp @@ -10,7 +10,7 @@ HCADecodeService::HCADecodeService() chunksize{ 24 }, workersem{ new Semaphore[this->numthreads]{} }, datasem{ 0 }, - mainsem{ this->numthreads }, + mainsem{ 0 }, numchannels{ 0 }, workingrequest{ nullptr }, shutdown{ false }, @@ -31,7 +31,7 @@ HCADecodeService::HCADecodeService(unsigned int numthreads, unsigned int chunksi chunksize{ chunksize ? chunksize : 24 }, workersem{ new Semaphore[this->numthreads]{} }, datasem{ 0 }, - mainsem{ this->numthreads }, + mainsem{ 0 }, numchannels{ 0 }, workingrequest{ nullptr }, shutdown{ false }, @@ -172,7 +172,6 @@ void HCADecodeService::Main_Thread() } mainsem.wait(numthreads); - wait_on_all_threads(mainsem); workingrequest = nullptr; @@ -221,12 +220,6 @@ void HCADecodeService::populate_block_list() } } -void HCADecodeService::wait_on_all_threads(Semaphore &sem) -{ - sem.wait(numthreads); - sem.notify(numthreads); -} - void HCADecodeService::join_workers() { for (unsigned int i = 0; i < numthreads; ++i) diff --git a/HCADecodeService.h b/HCADecodeService.h index 1a1d236..e23daae 100644 --- a/HCADecodeService.h +++ b/HCADecodeService.h @@ -25,7 +25,6 @@ class HCADecodeService void Decode_Thread(unsigned int id); void load_next_request(); void populate_block_list(); - void wait_on_all_threads(Semaphore &sem); void join_workers(); clHCA workingfile; diff --git a/clHCA.cpp b/clHCA.cpp index 15dd6cb..3ffc83b 100644 --- a/clHCA.cpp +++ b/clHCA.cpp @@ -13,23 +13,17 @@ // インライン関数 //-------------------------------------------------- #ifdef _MSC_VER -#include -// MSVC does not optimize these functions to bswap even on -O2 -inline short bswap(short v) { return _byteswap_ushort(v); } -inline unsigned short bswap(unsigned short v) { return _byteswap_ushort(v); } -inline int bswap(int v) { return _byteswap_ulong(v); } -inline unsigned int bswap(unsigned int v) { return _byteswap_ulong(v); } -inline long long bswap(long long v) { return _byteswap_uint64(v); } -inline unsigned long long bswap(unsigned long long v) { return _byteswap_uint64(v); } -#else -// gcc and clang optimize these functions to bswap instructions -inline short bswap(short v) { short r = v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; return r; } -inline unsigned short bswap(unsigned short v) { unsigned short r = v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; return r; } -inline int bswap(int v) { int r = v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; return r; } -inline unsigned int bswap(unsigned int v) { unsigned int r = v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; return r; } -inline long long bswap(long long v) { long long r = v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; return r; } -inline unsigned long long bswap(unsigned long long v) { unsigned long long r = v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; r <<= 8; v >>= 8; r |= v & 0xFF; return r; } +#define __builtin_bswap16 _byteswap_ushort +#define __builtin_bswap32 _byteswap_ulong +#define __builtin_bswap64 _byteswap_uint64 #endif // _MSC_VER +inline short bswap(short v) { return __builtin_bswap16(v); } +inline unsigned short bswap(unsigned short v) { return __builtin_bswap16(v); } +inline int bswap(int v) { return __builtin_bswap32(v); } +inline unsigned int bswap(unsigned int v) { return __builtin_bswap32(v); } +inline long long bswap(long long v) { return __builtin_bswap64(v); } +inline unsigned long long bswap(unsigned long long v) { return __builtin_bswap64(v); } + inline float bswap(float v) { unsigned int i = bswap(*(unsigned int *)&v); return *(float *)&i; } inline unsigned int ceil2(unsigned int a, unsigned int b) { return (b>0) ? (a / b + ((a%b) ? 1 : 0)) : 0; } template inline T clamp(T val, T min, T max) { return (val > max) ? max : (val < min) ? min : val; } @@ -687,13 +681,6 @@ void clHCA::AsyncDecode(stChannel *channels, float *wavebuffer, unsigned int blo if (stop) return; unsigned int samplesize = _mode >> 3; char *outwavptr = (char *)outputwavptr + (samplesize * blocknum * _channelCount << 10) + _wavheadersize; - if (blocknum == 0) - { - for (unsigned int i = 0; i < _channelCount; ++i) - { - memset(channels[i].wav2, 0, 512); // Clear previous IMDCT result - } - } unsigned int endblock = blocknum + chunksize; for (unsigned int currblock = blocknum ? blocknum - 1 : blocknum; currblock < endblock && currblock < _blockCount; ++currblock) { @@ -1087,10 +1074,10 @@ int clHCA::clData::CheckBit(int bitSize) { int v = 0; if (bitSize <= _size - _bit) { - static unsigned int mask[] = { 0xFFFFFF,0x7FFFFF,0x3FFFFF,0x1FFFFF,0x0FFFFF,0x07FFFF,0x03FFFF,0x01FFFF }; + static unsigned int mask[] = { 0xFFFFFF00,0xFFFF7F00,0xFFFF3F00,0xFFFF1F00,0xFFFF0F00,0xFFFF0700,0xFFFF0300,0xFFFF0100 }; unsigned int *data = (unsigned int *)&_data[_bit >> 3]; unsigned int shift_bits = _bit & 7; - v = bswap(*data) & mask[shift_bits]; + v = bswap(*data & mask[shift_bits]); v >>= 24 - shift_bits - bitSize; } return v; diff --git a/clHCA.vcxproj b/clHCA.vcxproj new file mode 100644 index 0000000..05a5ec3 --- /dev/null +++ b/clHCA.vcxproj @@ -0,0 +1,186 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {F91BD81C-9D95-42E5-BBA3-E0D2162F4144} + clHCA + 10.0.16299.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + $(IncludePath) + $(LibraryPath) + false + + + false + + + + Level3 + Disabled + true + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + + + + + Level3 + Disabled + true + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + + + + + Level2 + MaxSpeed + true + true + + + true + AnySuitable + + + + + Speed + true + true + StdCall + false + false + true + true + + + + + false + false + _HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) + MultiThreaded + + + true + true + UseLinkTimeCodeGeneration + %(AdditionalDependencies) + false + + + + + Level2 + MaxSpeed + true + true + + + true + AnySuitable + Speed + true + true + true + true + true + false + true + false + + false + + false + FastCall + _HAS_EXCEPTIONS=0;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + true + false + MultiThreaded + + + true + true + UseLinkTimeCodeGeneration + DebugFastLink + + + + + + + + + + + + + + + + \ No newline at end of file