Skip to content

Commit

Permalink
fix OggFLac header management, add test, simplify binary search
Browse files Browse the repository at this point in the history
  • Loading branch information
philippe44 committed Feb 1, 2024
1 parent 60dd183 commit 6ea9880
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 75 deletions.
54 changes: 35 additions & 19 deletions src/ogf.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ typedef struct {
uint64_t granule_pos;
uint32_t serialno, page_num;
uint32_t checksum;
uint8_t segments, table;
uint8_t segments;
} ogg_header_t;

typedef struct {
ogg_header_t ogg;
uint8_t type;
char signature[4];
uint8_t maj;
Expand All @@ -49,7 +48,7 @@ typedef struct {
uint8_t sample_count[4];
uint8_t md5[16];
} streaminfo;
} ogg_page_t;
} flac_page_t;
#pragma pack(pop)

uint32_t compute_crc32(uint8_t *data, size_t n);
Expand Down Expand Up @@ -496,48 +495,55 @@ ogf_find_frame_return_info(PerlIO *infile, char *file, int offset, HV *info)
// finally adjust STREAMINFO header
if (frame_offset >= 0) {
Buffer buf;
ogg_page_t *page;
flac_page_t *page;
ogg_header_t *header;
uint32_t audio_offset = SvIV( *(my_hv_fetch( info, "audio_offset" )) );

// don't understand my mp4.c does not seek here...
PerlIO_seek(infile, 0, SEEK_SET);
buffer_init(&buf, OGG_MAX_PAGE_SIZE + OGG_HEADER_SIZE);

// there is only one segment in header
_check_buf(infile, &buf, OGG_MAX_PAGE_SIZE, OGG_MAX_PAGE_SIZE + OGG_HEADER_SIZE);
page = buffer_ptr(&buf);
header = buffer_ptr(&buf);
page = buffer_ptr(&buf) + sizeof(*header) + 1;

DEBUG_TRACE("now reading vorbis commend\n");
DEBUG_TRACE("now reading vorbis comment\n");

// 1st page is 1st packet and with single lacing value
if (!strncmp(page->ogg.tag, "OggS", 4) && page->type == 0x7f &&
if (!strncmp(header->tag, "OggS", 4) && page->type == 0x7f &&
!strncmp(page->signature, "FLAC", 4) && !strncmp(page->header.tag, "fLaC", 4)) {
SV* seek_header = newSVpv("", 0);
int page_count = 0;
bool done = false;
off_t page_len = sizeof(*page);
off_t page_len = sizeof(*header) + 1 + sizeof(*page);

page->streaminfo.combo[3] &= 0xf0;
memset(page->streaminfo.sample_count, 0, sizeof(page->streaminfo.sample_count));
memset(page->streaminfo.md5, 0, sizeof(page->streaminfo.md5));
page->num_headers = __le32toh__(1);
page->ogg.checksum = 0;
page->ogg.checksum = __le32toh__(compute_crc32((uint8_t*) page, sizeof(*page)));
page->num_headers = 1;
#if (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__)
page->num_headers <<= 8;
#endif
header->checksum = 0;
header->checksum = __le32toh__(compute_crc32(buffer_ptr(&buf), page_len));

// store the updated OggFlac first packet/page (same in this case)
sv_catpvn( seek_header, (char*) buffer_ptr(&buf), page_len);
sv_catpvn( seek_header, (char*) buffer_ptr(&buf), page_len);

//now we need to keep the 1st page (vorbis comment) and the rest is useless
// now we need to keep the 1st page (vorbis comment) and the rest is useless
do {
int i;
uint8_t *ptr;
ogg_header_t *header;

// replenish what we consumed to that we have a full buffer
buffer_consume(&buf, page_len);
_check_buf(infile, &buf, page_len, page_len);
page_len = 0;

header = buffer_ptr(&buf);

ptr = buffer_ptr(&buf) + sizeof(*header) + 1;

// make sure this is a page
if (memcmp(header->tag, "OggS", 4)) {
PerlIO_printf(PerlIO_stderr(), "error reading vorbis comment (%s)\n", file);
Expand All @@ -548,14 +554,24 @@ ogf_find_frame_return_info(PerlIO *infile, char *file, int offset, HV *info)

if (header->granule_pos == ULLONG_MAX) {
page_len = header->segments * 255;
} else for (ptr = &header->table, i = 0; i < header->segments && !done; i++, ptr++) {
} else for (ptr = buffer_ptr(&buf) + sizeof(*header), i = 0; i < header->segments && !done; i++, ptr++) {
page_len += *ptr;
if (*ptr != 255) done = true;
}

page_len += sizeof(*header) + header->segments;

// this is the last flac header, need to to set VORBIS_COMMENT as last header and update crc
if (page_count++ == 0) {
ptr = buffer_ptr(&buf) + sizeof(*header) + header->segments;
*ptr = 0x80 | FLAC_TYPE_VORBIS_COMMENT;
header->checksum = 0;
header->checksum = __le32toh__(compute_crc32(buffer_ptr(&buf), page_len));
DEBUG_TRACE("found vorbis comment header\n", page_len, header->segments);
}

page_len += sizeof(*header) + header->segments - 1;

sv_catpvn( seek_header, (char*) buffer_ptr(&buf), page_len );
DEBUG_TRACE("adding a page len:%d of %d segments\n", page_len, header->segments);
DEBUG_TRACE("adding page %d of len:%d with %d segments\n", page_count, page_len, header->segments);
} while (!done);

my_hv_store( info, "seek_header", seek_header );
Expand Down
117 changes: 63 additions & 54 deletions src/ogg.c
Original file line number Diff line number Diff line change
Expand Up @@ -559,12 +559,11 @@ _ogg_binary_search_sample(PerlIO *infile, char *file, HV *info, uint64_t target_
buffer_init(&buf, OGG_MAX_PAGE_SIZE + OGG_HEADER_SIZE);

while (high > low) {
off_t mid, extend;
off_t mid;
off_t page_start_offset = -1;
uint32_t cur_serialno;
int i; // Used by macro CONVERT_INT32LE
bool inner;


// no point dividing the buffer in half if we don't potentially have a full header in each half
if (high - low > 2 * OGG_HEADER_SIZE) {
mid = low + ((high - low) / 2);
Expand All @@ -580,60 +579,42 @@ _ogg_binary_search_sample(PerlIO *infile, char *file, HV *info, uint64_t target_
goto out;
}

for (granule_pos = ULLONG_MAX, inner = true, extend = 0; granule_pos == ULLONG_MAX && inner;)
{
// this is where we extend to previous lower side if needed
if (PerlIO_seek(infile, mid + extend, SEEK_SET) == -1) {
frame_offset = -1;
goto out;
if (PerlIO_seek(infile, mid, SEEK_SET) == -1) {
frame_offset = -1;
goto out;
}

buffer_clear(&buf);

// Worst case is:
// ....OggS...<OGG_MAX_PAGE_SIZE>...OggS
// ^-mid ^-high
//
// To handle this, read OGG_HEADER_SIZE bytes extra after 'high'
// so that we find the header that starts just before 'high'.
// Still, the actual granule might be in the lower side of
// the previous interval, so we'll need to get it later
if (!_check_buf(infile, &buf, OGG_HEADER_SIZE,
MIN(OGG_MAX_PAGE_SIZE, high - mid) + OGG_HEADER_SIZE)) {
frame_offset = -1;
goto out;
}

for (bptr = buffer_ptr(&buf), buf_size = buffer_len(&buf); ; ++bptr, --buf_size) {
if (buf_size < 4) {
// no page start found, force exit of outer loop
DEBUG_TRACE(" no OggS in current buffer\n");
break;
}

buffer_clear(&buf);

// Worst case is:
// ....OggS...<OGG_MAX_PAGE_SIZE>...OggS
// ^-mid ^-high
//
// To handle this, read OGG_HEADER_SIZE bytes extra after 'high'
// so that we find the header that starts just before 'high'.
// Still, the actual granuler might be in the lower side of
// the previous interval, so we need to extend our reach up
// to there if we still have no granule
if (!_check_buf(infile, &buf, OGG_HEADER_SIZE,
MIN(OGG_MAX_PAGE_SIZE, high - mid) + OGG_HEADER_SIZE)) {
frame_offset = -1;
goto out;
if (bptr[0] != 'O' || bptr[1] != 'g' || bptr[2] != 'g' || bptr[3] != 'S') {
continue;
}

for (bptr = buffer_ptr(&buf), buf_size = buffer_len(&buf); ; ++bptr, --buf_size) {
if (buf_size < 4) {
// no page start found, force exit of outer loop
DEBUG_TRACE(" no OggS in current buffer\n");
inner = false;
break;
}
page_start_offset = buffer_len(&buf) - buf_size;
frame_offset = mid + page_start_offset;

if (bptr[0] != 'O' || bptr[1] != 'g' || bptr[2] != 'g' || bptr[3] != 'S') {
continue;
}

// Read granule_pos for this packet
granule_pos = (uint64_t)CONVERT_INT32LE((bptr + 6));
granule_pos |= (uint64_t)CONVERT_INT32LE((bptr + 10)) << 32;
if (granule_pos != ULLONG_MAX) {
page_start_offset = buffer_len(&buf) - buf_size;
frame_offset = mid + page_start_offset;
DEBUG_TRACE(" found OggS with offset %d (extend:%d)\n", page_start_offset, extend);
} else {
// if no packet ends here, then this is a full set of segments with 255 bytes each
uint16_t segments = *(bptr + 26);
// let's start spot-on
extend += OGG_HEADER_SIZE + segments - 1 + segments * 255 + buffer_len(&buf) - buf_size;
DEBUG_TRACE(" unusable granule, next page in %hu (extend:%d, bufsize:%d)\n", segments * 255, extend, buf_size);
}

break;
}
break;
}

if (page_start_offset < 0) {
Expand All @@ -643,7 +624,10 @@ _ogg_binary_search_sample(PerlIO *infile, char *file, HV *info, uint64_t target_
continue;
}

DEBUG_TRACE(" checking frame at %d\n", frame_offset);
buffer_consume(&buf, page_start_offset);
bptr = buffer_ptr(&buf);
granule_pos = (uint64_t)CONVERT_INT32LE((bptr + 6));
granule_pos |= (uint64_t)CONVERT_INT32LE((bptr + 10)) << 32;

// Also read serial number, if this ever changes within a file it is a chained
// file and we can't seek
Expand All @@ -654,6 +638,31 @@ _ogg_binary_search_sample(PerlIO *infile, char *file, HV *info, uint64_t target_
goto out;
}

// if we are not on a usable granule, we need to frab it and it might be far ahead
while (granule_pos == ULLONG_MAX) {
uint16_t segments = *(bptr + 26);
int page_len = OGG_HEADER_SIZE + segments - 1 + segments * 255;
DEBUG_TRACE(" landed on unusable granule, next page in %hu (avail:%d)\n", page_len, buffer_len(&buf));

// replenish enough to get next ogg header and then consume the current page
if (buffer_len(&buf) < page_len + OGG_HEADER_SIZE) _check_buf(infile, &buf, page_len + OGG_HEADER_SIZE, OGG_MAX_PAGE_SIZE);
buffer_consume(&buf, page_len);
bptr = buffer_ptr(&buf);

// safety measure
if (memcmp(bptr, "OggS", 4)) {
PerlIO_printf(PerlIO_stderr(), "error searching for usable granule: %s\n", file);
frame_offset = -1;
goto out;
}

// now evaluate if we finally reached a usable granule (we must be aligned)
granule_pos = (uint64_t)CONVERT_INT32LE((bptr + 6));
granule_pos |= (uint64_t)CONVERT_INT32LE((bptr + 10)) << 32;
}

DEBUG_TRACE(" checking frame at %d\n", frame_offset);

if (granule_pos > target_sample) {
best_frame_offset = frame_offset;
DEBUG_TRACE(" searching lower (best:%d)\n", best_frame_offset);
Expand All @@ -669,7 +678,7 @@ _ogg_binary_search_sample(PerlIO *infile, char *file, HV *info, uint64_t target_
break;
}
}

frame_offset = best_frame_offset;

out:
Expand Down
31 changes: 29 additions & 2 deletions t/ogf.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use strict;

use File::Spec::Functions;
use FindBin ();
use Test::More tests => 34;
use Test::More tests => 45;

use Audio::Scan;

Expand Down Expand Up @@ -78,8 +78,35 @@ eval {
my $info = Audio::Scan->find_frame_fh_return_info( ogf => $fh, 500 );

is( $info->{audio_offset}, 106744, 'Audio offset ok' );
is( $info->{seek_offset}, 192576, 'Seek offset ok' );
is( $info->{seek_offset}, 193419, 'Seek offset ok' );
is( length $info->{seek_header}, 98411, 'Seek header ok' );

close $fh;

open $fh, '>', _f('headers.ogf');
binmode $fh;
print $fh $info->{seek_header};
close $fh;

my $s = Audio::Scan->scan( _f('headers.ogf') );
my $info = $s->{info};
my $tags = $s->{tags};

is( $info->{bits_per_sample}, 16, 'Bits per sample ok' );
is( $info->{channels}, 2, 'Channels ok' );
is( $info->{maximum_blocksize}, 4096, 'Max blocksize ok' );
is( $info->{maximum_framesize}, 16394, 'Max framesize ok' );
is( $info->{audio_md5}, '00000000000000000000000000000000', 'MD5 ok' );
is( $info->{minimum_blocksize}, 4096, 'Min blocksize ok' );
is( $info->{minimum_framesize}, 12572, 'Min framesize ok' );
is( $info->{samplerate}, 44100, 'Samplerate ok' );
is( $info->{song_length_ms}, 0, 'Song length ok' );
is( $info->{total_samples}, 0, 'Total samples ok' );

is( $tags->{VENDOR}, 'reference libFLAC 1.2.1 20070917', 'VENDOR ok' );

unlink _f('headers.ogf');

}

sub _f {
Expand Down

0 comments on commit 6ea9880

Please sign in to comment.