diff --git a/Core/apu.c b/Core/apu.c index 3fb3bd23c..fe05a9b7a 100644 --- a/Core/apu.c +++ b/Core/apu.c @@ -6,6 +6,76 @@ #include #include "gb.h" +/* Band limited synthesis based on: http://www.slack.net/~ant/bl-synth/ */ +static int32_t band_limited_steps[GB_BAND_LIMITED_PHASES][GB_BAND_LIMITED_WIDTH]; + +static void __attribute__((constructor)) band_limited_init(void) +{ + const unsigned master_size = GB_BAND_LIMITED_WIDTH * GB_BAND_LIMITED_PHASES; + double *master = malloc(master_size * sizeof(*master)); + nounroll for (unsigned i = 0; i < master_size; i++) { + master[i] = 0.5; + } + + const unsigned sine_size = 256 * GB_BAND_LIMITED_PHASES + 2; + const unsigned max_harmonic = sine_size / 2 / GB_BAND_LIMITED_PHASES; + nounroll for (unsigned harmonic = 1; harmonic <= max_harmonic; harmonic += 2) { + double amplitude = 4 / M_PI / harmonic / 2; + double to_angle = M_PI * 2 / sine_size * harmonic; + nounroll for (unsigned i = 0; i < master_size; i++) { + master[i] += sin(((signed)i - (signed)master_size / 2) * to_angle) * amplitude; + } + } + + for (unsigned phase = 0; phase < GB_BAND_LIMITED_PHASES; phase++) { + int32_t error = GB_BAND_LIMITED_ONE; + int32_t prev = 0; + for (unsigned i = 0; i < GB_BAND_LIMITED_WIDTH; i++) { + int32_t cur = master[(GB_BAND_LIMITED_PHASES - 1 - phase) + i * GB_BAND_LIMITED_PHASES] * GB_BAND_LIMITED_ONE; + int32_t delta = cur - prev; + error = error - delta; + prev = cur; + band_limited_steps[phase][i] = delta; + } + + // Make sure the deltas sum to 1.0 + band_limited_steps[phase][GB_BAND_LIMITED_WIDTH / 2 - 1] += error / 2; + band_limited_steps[phase][0] += error - (error / 2); + } + free(master); +} + +static void band_limited_update(GB_band_limited_t *band_limited, const GB_sample_t *input, unsigned phase) +{ + if (input->packed == band_limited->input.packed) return; + unsigned delay = phase / GB_BAND_LIMITED_PHASES; + phase = phase & (GB_BAND_LIMITED_PHASES - 1); + + GB_sample_t delta = { + .left = input->left - band_limited->input.left, + .right = input->right - band_limited->input.right, + }; + band_limited->input.packed = input->packed; + + for (unsigned i = 0; i < GB_BAND_LIMITED_WIDTH; i++) { + unsigned offset = (i + band_limited->pos + delay) & (sizeof(band_limited->buffer) / sizeof(band_limited->buffer[0]) - 1); + band_limited->buffer[offset].left += delta.left * band_limited_steps[phase][i]; + band_limited->buffer[offset].right += delta.right * band_limited_steps[phase][i]; + } +} + +static void band_limited_read(GB_band_limited_t *band_limited, GB_sample_t *output, uint32_t multiplier) +{ + band_limited->output.left += band_limited->buffer[band_limited->pos].left; + band_limited->output.right += band_limited->buffer[band_limited->pos].right; + + band_limited->buffer[band_limited->pos].left = band_limited->buffer[band_limited->pos].right = 0; + band_limited->pos = (band_limited->pos + 1) & (sizeof(band_limited->buffer) / sizeof(band_limited->buffer[0]) - 1); + + output->left = band_limited->output.left * multiplier / GB_BAND_LIMITED_ONE; + output->right = band_limited->output.right * multiplier / GB_BAND_LIMITED_ONE; +} + static const uint8_t duties[] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, @@ -13,14 +83,6 @@ static const uint8_t duties[] = { 0, 1, 1, 1, 1, 1, 1, 0, }; -static void refresh_channel(GB_gameboy_t *gb, GB_channel_t index, unsigned cycles_offset) -{ - unsigned multiplier = gb->apu_output.cycles_since_render + cycles_offset - gb->apu_output.last_update[index]; - gb->apu_output.summed_samples[index].left += gb->apu_output.current_sample[index].left * multiplier; - gb->apu_output.summed_samples[index].right += gb->apu_output.current_sample[index].right * multiplier; - gb->apu_output.last_update[index] = gb->apu_output.cycles_since_render + cycles_offset; -} - bool GB_apu_is_DAC_enabled(GB_gameboy_t *gb, GB_channel_t index) { if (gb->model > GB_MODEL_CGB_E) { @@ -69,7 +131,6 @@ static uint8_t agb_bias_for_channel(GB_gameboy_t *gb, GB_channel_t index) static void update_sample(GB_gameboy_t *gb, GB_channel_t index, int8_t value, unsigned cycles_offset) { - if (gb->model > GB_MODEL_CGB_E) { /* On the AGB, because no analog mixing is done, the behavior of NR51 is a bit different. A channel that is not connected to a terminal is idenitcal to a connected channel @@ -100,10 +161,9 @@ static void update_sample(GB_gameboy_t *gb, GB_channel_t index, int8_t value, un output.left = output.right = 0; } - if (gb->apu_output.current_sample[index].packed != output.packed) { - refresh_channel(gb, index, cycles_offset); - gb->apu_output.current_sample[index] = output; - } + band_limited_update(&gb->apu_output.band_limited[index], + &output, + (gb->apu_output.cycles_since_render + cycles_offset) * GB_BAND_LIMITED_PHASES / gb->apu_output.max_cycles_per_sample); } return; @@ -131,10 +191,9 @@ static void update_sample(GB_gameboy_t *gb, GB_channel_t index, int8_t value, un if (likely(!gb->apu_output.channel_muted[index])) { output = (GB_sample_t){(0xF - value * 2) * left_volume, (0xF - value * 2) * right_volume}; } - if (gb->apu_output.current_sample[index].packed != output.packed) { - refresh_channel(gb, index, cycles_offset); - gb->apu_output.current_sample[index] = output; - } + band_limited_update(&gb->apu_output.band_limited[index], + &output, + (gb->apu_output.cycles_since_render + cycles_offset) * GB_BAND_LIMITED_PHASES / gb->apu_output.max_cycles_per_sample); } } @@ -210,20 +269,12 @@ static void render(GB_gameboy_t *gb) } } } + + GB_sample_t channel_output; + band_limited_read(&gb->apu_output.band_limited[i], &channel_output, multiplier); - if (likely(gb->apu_output.last_update[i] == 0 || gb->apu_output.cycles_since_render == 0)) { - output.left += gb->apu_output.current_sample[i].left * multiplier; - output.right += gb->apu_output.current_sample[i].right * multiplier; - } - else { - refresh_channel(gb, i, 0); - output.left += (signed long) gb->apu_output.summed_samples[i].left * multiplier - / gb->apu_output.cycles_since_render; - output.right += (signed long) gb->apu_output.summed_samples[i].right * multiplier - / gb->apu_output.cycles_since_render; - gb->apu_output.summed_samples[i] = (GB_sample_t){0, 0}; - } - gb->apu_output.last_update[i] = 0; + output.left += channel_output.left; + output.right += channel_output.right; } gb->apu_output.cycles_since_render = 0; @@ -295,7 +346,7 @@ static void render(GB_gameboy_t *gb) } } -static void update_square_sample(GB_gameboy_t *gb, GB_channel_t index) +static void update_square_sample(GB_gameboy_t *gb, GB_channel_t index, unsigned cycles) { if (gb->apu.square_channels[index].sample_surpressed) { if (gb->model > GB_MODEL_CGB_E) { @@ -308,7 +359,7 @@ static void update_square_sample(GB_gameboy_t *gb, GB_channel_t index) update_sample(gb, index, duties[gb->apu.square_channels[index].current_sample_index + duty * 8]? gb->apu.square_channels[index].current_volume : 0, - 0); + cycles); } static inline void update_wave_sample(GB_gameboy_t *gb, unsigned cycles) @@ -424,7 +475,7 @@ static void tick_square_envelope(GB_gameboy_t *gb, GB_channel_t index) } if (gb->apu.is_active[index]) { - update_square_sample(gb, index); + update_square_sample(gb, index, 0); } } @@ -785,7 +836,7 @@ restart:; gb->apu.pcm_mask[0] &= i == GB_SQUARE_1? 0xF0 : 0x0F; } gb->apu.square_channels[i].did_tick = true; - update_square_sample(gb, i); + update_square_sample(gb, i, cycles - cycles_left); uint8_t duty = gb->io_registers[i == GB_SQUARE_1? GB_IO_NR11 :GB_IO_NR21] >> 6; uint8_t edge_sample_index = inline_const(uint8_t[], {7, 7, 5, 1})[duty]; @@ -1250,7 +1301,7 @@ void GB_apu_write(GB_gameboy_t *gb, uint8_t reg, uint8_t value) nrx2_glitch(gb, &gb->apu.square_channels[index].current_volume, value, gb->io_registers[reg], &gb->apu.square_channels[index].volume_countdown, &gb->apu.square_channels[index].envelope_clock); - update_square_sample(gb, index); + update_square_sample(gb, index, 0); } break; @@ -1335,7 +1386,7 @@ void GB_apu_write(GB_gameboy_t *gb, uint8_t reg, uint8_t value) started sound). The playback itself is not instant which is why we don't update the sample for other cases. */ if (gb->apu.is_active[index]) { - update_square_sample(gb, index); + update_square_sample(gb, index, 0); } gb->apu.square_channels[index].volume_countdown = gb->io_registers[index == GB_SQUARE_1 ? GB_IO_NR12 : GB_IO_NR22] & 7; diff --git a/Core/apu.h b/Core/apu.h index 312f88452..59f365d89 100644 --- a/Core/apu.h +++ b/Core/apu.h @@ -5,7 +5,12 @@ #include #include "defs.h" +#define GB_BAND_LIMITED_WIDTH 16 +#define GB_BAND_LIMITED_PHASES 32 + #ifdef GB_INTERNAL +#define GB_BAND_LIMITED_ONE 0x10000 // fixed point value equal to 1 + /* Speed = 1 / Length (in seconds) */ #define DAC_DECAY_SPEED 20000 #define DAC_ATTACK_SPEED 20000 @@ -165,17 +170,22 @@ typedef enum { GB_AUDIO_FORMAT_WAV, } GB_audio_format_t; +typedef struct { + struct { + int32_t left, right; + } buffer[GB_BAND_LIMITED_WIDTH * 2], output; + uint8_t pos; + GB_sample_t input; +} GB_band_limited_t; + typedef struct { unsigned sample_rate; unsigned sample_cycles; // Counts by sample_rate until it reaches the clock frequency unsigned max_cycles_per_sample; - // Samples are NOT normalized to MAX_CH_AMP * 4 at this stage! - unsigned cycles_since_render; - unsigned last_update[GB_N_CHANNELS]; - GB_sample_t current_sample[GB_N_CHANNELS]; - GB_sample_t summed_samples[GB_N_CHANNELS]; + uint32_t cycles_since_render; + GB_band_limited_t band_limited[GB_N_CHANNELS]; double dac_discharge[GB_N_CHANNELS]; bool channel_muted[GB_N_CHANNELS]; bool edge_triggered[GB_N_CHANNELS];