Skip to content

Commit

Permalink
Improve code-generation for Strings
Browse files Browse the repository at this point in the history
Make String::Data use trivial copy of the short/long union to avoid
unnecessary branching there, inline release() as it can be elided by
the compiler on moved-from Strings.
  • Loading branch information
mawww committed Jul 20, 2021
1 parent c643cd4 commit a4dd89f
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 65 deletions.
10 changes: 5 additions & 5 deletions gdb/kakoune.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,12 @@ def __init__(self, val):

def to_string(self):
data = self.val["m_data"]
if (data["s"]["size"] & 1) != 1:
ptr = data["l"]["ptr"]
len = data["l"]["size"]
if (data["u"]["s"]["size"] & 1) != 1:
ptr = data["u"]["l"]["ptr"]
len = data["u"]["l"]["size"]
else:
ptr = data["s"]["string"]
len = data["s"]["size"] >> 1
ptr = data["u"]["s"]["string"]
len = data["u"]["s"]["size"] >> 1
return "\"%s\"" % (ptr.string("utf-8", "ignore", len))


Expand Down
49 changes: 16 additions & 33 deletions src/string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,18 @@ String::Data::Data(const char* data, size_t size, size_t capacity)
++capacity;

kak_assert(capacity < Long::max_capacity);
l.ptr = Alloc{}.allocate(capacity+1);
l.size = size;
l.capacity = capacity;
u.l.ptr = Alloc{}.allocate(capacity+1);
u.l.size = size;
u.l.capacity = capacity;

if (data != nullptr)
memcpy(l.ptr, data, size);
l.ptr[size] = 0;
memcpy(u.l.ptr, data, size);
u.l.ptr[size] = 0;
}
else
set_short(data, size);
}

String::Data::Data(Data&& other) noexcept
{
if (other.is_long())
{
l = other.l;
other.set_empty();
}
else
s = other.s;
}

String::Data& String::Data::operator=(const Data& other)
{
if (&other == this)
Expand All @@ -59,11 +48,11 @@ String::Data& String::Data::operator=(Data&& other) noexcept

if (other.is_long())
{
l = other.l;
u.l = other.u.l;
other.set_empty();
}
else
s = other.s;
u.s = other.u.s;

return *this;
}
Expand All @@ -75,7 +64,7 @@ void String::Data::reserve(size_t new_capacity)
return;

if (is_long())
new_capacity = std::max(l.capacity * 2, new_capacity);
new_capacity = std::max(u.l.capacity * 2, new_capacity);

if (new_capacity & 1)
++new_capacity;
Expand All @@ -85,12 +74,12 @@ void String::Data::reserve(size_t new_capacity)
if (copy)
{
memcpy(new_ptr, data(), size()+1);
l.size = size();
u.l.size = size();
}
release();

l.ptr = new_ptr;
l.capacity = new_capacity;
u.l.ptr = new_ptr;
u.l.capacity = new_capacity;
}

template void String::Data::reserve<true>(size_t);
Expand Down Expand Up @@ -121,12 +110,6 @@ void String::Data::clear()
set_empty();
}

void String::Data::release()
{
if (is_long() and l.capacity != 0)
Alloc{}.deallocate(l.ptr, l.capacity+1);
}

void String::resize(ByteCount size, char c)
{
const size_t target_size = (size_t)size;
Expand All @@ -146,17 +129,17 @@ void String::resize(ByteCount size, char c)
void String::Data::set_size(size_t size)
{
if (is_long())
l.size = size;
u.l.size = size;
else
s.size = (size << 1) | 1;
u.s.size = (size << 1) | 1;
}

void String::Data::set_short(const char* data, size_t size)
{
s.size = (size << 1) | 1;
u.s.size = (size << 1) | 1;
if (data != nullptr)
memcpy(s.string, data, size);
s.string[size] = 0;
memcpy(u.s.string, data, size);
u.s.string[size] = 0;
}

const String String::ms_empty;
Expand Down
65 changes: 38 additions & 27 deletions src/string.hh
Original file line number Diff line number Diff line change
Expand Up @@ -156,45 +156,28 @@ public:
// capacity must be pair, on little endian systems that means the allocated
// capacity cannot use its most significant byte, so we effectively limit
// capacity to 2^24 on 32bit arch, and 2^60 on 64.
union Data
struct Data
{
using Alloc = Allocator<char, MemoryDomain::String>;

struct Long
{
static constexpr size_t max_capacity =
(size_t)1 << 8 * (sizeof(size_t) - 1);

char* ptr;
size_t size;
size_t capacity;
} l;

struct Short
{
static constexpr size_t capacity = sizeof(Long) - 2;
char string[capacity+1];
unsigned char size;
} s;

Data() { set_empty(); }
Data(NoCopy, const char* data, size_t size) : l{const_cast<char*>(data), size, 0} {}
Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, 0}} {}

Data(const char* data, size_t size, size_t capacity);
Data(const char* data, size_t size) : Data(data, size, size) {}
Data(const Data& other) : Data{other.data(), other.size()} {}

~Data() { release(); }
Data(Data&& other) noexcept;
Data(Data&& other) noexcept : u{other.u} { other.set_empty(); }
Data& operator=(const Data& other);
Data& operator=(Data&& other) noexcept;

bool is_long() const { return (s.size & 1) == 0; }
size_t size() const { return is_long() ? l.size : (s.size >> 1); }
size_t capacity() const { return is_long() ? l.capacity : Short::capacity; }
bool is_long() const { return (u.s.size & 1) == 0; }
size_t size() const { return is_long() ? u.l.size : (u.s.size >> 1); }
size_t capacity() const { return is_long() ? u.l.capacity : Short::capacity; }

const char* data() const { return is_long() ? l.ptr : s.string; }
char* data() { return is_long() ? l.ptr : s.string; }
const char* data() const { return is_long() ? u.l.ptr : u.s.string; }
char* data() { return is_long() ? u.l.ptr : u.s.string; }

template<bool copy = true>
void reserve(size_t new_capacity);
Expand All @@ -204,8 +187,36 @@ public:
void clear();

private:
void release();
void set_empty() { s.size = 1; s.string[0] = 0; }
struct Long
{
static constexpr size_t max_capacity =
(size_t)1 << 8 * (sizeof(size_t) - 1);

char* ptr;
size_t size;
size_t capacity;
};

struct Short
{
static constexpr size_t capacity = sizeof(Long) - 2;
char string[capacity+1];
unsigned char size;
};

union
{
Long l;
Short s;
} u;

void release()
{
if (is_long() and u.l.capacity != 0)
Alloc{}.deallocate(u.l.ptr, u.l.capacity+1);
}

void set_empty() { u.s.size = 1; u.s.string[0] = 0; }
void set_short(const char* data, size_t size);
};

Expand Down

0 comments on commit a4dd89f

Please sign in to comment.