Skip to content

Commit

Permalink
Fiz long encoding/decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
jonhkr committed Jan 23, 2025
1 parent fcf815e commit 2de9b2f
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 23 deletions.
97 changes: 76 additions & 21 deletions lib/avro/datum.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,79 @@ public function __construct($expected_schema, $datum)
}
}

/**
* Zigzag implementation to encode longs
* https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding
*
* @package Avro
*/
class Zigzag {

/**
* Implementation of unsigned shift right as PHP does not have the `>>>` operator
*
* @param int $n
* @param int $x
*
* @return int
*/
public static function unsigned_right_shift(int $n, int $x): int
{
$byte_bits = 8;
$platform_bits = PHP_INT_SIZE * $byte_bits;

// $sign_mask will be a row of 1s (0xFFFFFFFF) if $n is
// negative, and a row of 0s (0x00000000) if $n is positive
$sign_mask = ($n >> ($platform_bits - 1));

// $filler_removal_mask will have the $x left most bits set
// to 1 and the other bits set to 0 if $n is negative.
// if $n is positive, sign mask will have all bit set to 0
$filler_removal_mask = $sign_mask << $platform_bits - $x;

// right shift $n with $x and apply the mask to flip the
// left most bits to 0 if $n is negative and do nothing otherwise
return ($n >> $x) ^ $filler_removal_mask;
}

/**
* @param int|string $n
* @return string long $n encoded as bytes
* @internal This relies on 64-bit PHP.
*/
public static function encode_long($n): string
{
$n = (int) $n;
$n = ($n << 1) ^ ($n >> 63);
$str = '';
if (($n & ~0x7F) != 0) {
$str .= chr(($n | 0x80) & 0xFF);
$n = self::unsigned_right_shift($n, 7);

while ($n > 0x7F) {
$str .= chr(($n | 0x80) & 0xFF);
$n = self::unsigned_right_shift($n, 7);
}
}

$str .= chr($n);
return $str;
}

public static function decode_long(array $bytes): int {
$b = array_shift($bytes);
$n = $b & 0x7f;
$shift = 7;
while (0 != ($b & 0x80))
{
$b = array_shift($bytes);
$n |= (($b & 0x7f) << $shift);
$shift += 7;
}
return self::unsigned_right_shift($n, 1) ^ -($n & 1);
}
}

/**
* Exceptions arising from incompatibility between
* reader and writer schemas.
Expand Down Expand Up @@ -304,18 +377,9 @@ static function double_to_long_bits($double)
* @return string long $n encoded as bytes
* @internal This relies on 64-bit PHP.
*/
static public function encode_long($n)
public static function encode_long($n): string
{
$n = (int) $n;
$n = ($n << 1) ^ ($n >> 63);
$str = '';
while (0 != ($n & ~0x7F))
{
$str .= chr(($n & 0x7F) | 0x80);
$n >>= 7;
}
$str .= chr($n);
return $str;
return Zigzag::encode_long($n);
}

/**
Expand Down Expand Up @@ -931,16 +995,7 @@ class AvroIOBinaryDecoder
*/
public static function decode_long_from_array($bytes)
{
$b = array_shift($bytes);
$n = $b & 0x7f;
$shift = 7;
while (0 != ($b & 0x80))
{
$b = array_shift($bytes);
$n |= (($b & 0x7f) << $shift);
$shift += 7;
}
return (($n >> 1) ^ -($n & 1));
return Zigzag::decode_long($bytes);
}

/**
Expand Down
45 changes: 43 additions & 2 deletions test/DatumIOTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,37 @@ function test_datum_round_trip($schema_json, $datum, $binary)
$this->assertEquals($datum, $read_datum);
}

/**
* @dataProvider zigzag_unsigned_right_shift_provider
*/
function test_zigzag_unsigned_right_shift(int $expected, int $n, int $x) {
$this->assertEquals($expected, Zigzag::unsigned_right_shift($n, $x));
}

public static function zigzag_unsigned_right_shift_provider(): array {
return [
[4611686018427387902, -8, 2],
[2, 8, 2],
[144115188075855871, -2, 7],
[1125899906842623, 144115188075855871, 7],
[8796093022207, 1125899906842623, 7],
[68719476735, 8796093022207, 7],
[536870911, 68719476735, 7],
[4194303, 536870911, 7],
[32767, 4194303, 7],
[255, 32767, 7],
[1, 255, 7],
[144115188059078656, -2147483648, 7],
[1125899906711552, 144115188059078656, 7],
[8796093021184, 1125899906711552, 7],
[68719476728, 8796093021184, 7],
[536870911, 68719476728, 7],
[4194303, 536870911, 7],
[32767, 4194303, 7],
[255, 32767, 7],
];
}

/**
* @return array
*/
Expand All @@ -67,11 +98,21 @@ function data_provider()
array('"int"', 1, "\002"),
array('"int"', 2147483647, "\xFE\xFF\xFF\xFF\x0F"),

// array('"long"', (int) -9223372036854775808, "\001"),
array('"long"', (int) -9223372036854775808, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),
array('"long"', -(1<<62), "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F"),
array('"long"', -4294967295, "\xFD\xFF\xFF\xFF\x1F"),
array('"long"', -10, "\x13"),
array('"long"', -3, "\005"),
array('"long"', -2, "\003"),
array('"long"', -1, "\001"),
array('"long"', 0, "\000"),
array('"long"', 1, "\002"),
// array('"long"', 9223372036854775807, "\002")
array('"long"', 2, "\004"),
array('"long"', 3, "\006"),
array('"long"', 10, "\x14"),
array('"long"', 4294967295, "\xFE\xFF\xFF\xFF\x1F"),
array('"long"', 1<<62, "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01"),
array('"long"', 9223372036854775807, "\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),

array('"float"', (float) -10.0, "\000\000 \301"),
array('"float"', (float) -1.0, "\000\000\200\277"),
Expand Down

0 comments on commit 2de9b2f

Please sign in to comment.