Skip to content
This repository has been archived by the owner on Jan 11, 2020. It is now read-only.

Fix strlen, implement strnlen and stdint.h, add strlen/strnlen tests #12

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/include/stdint.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,43 @@
*
* @see http://libc11.org/stdint/
*/
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef short int16_t;
typedef unsigned short uint16_t;
#ifdef __LP64__
typedef int int32_t;
typedef unsigned int uint32_t;
typedef long int64_t;
typedef unsigned long uint64_t;
#else
typedef long int32_t;
typedef unsigned long uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
#endif

typedef int64_t intmax_t;
typedef uint64_t uintmax_t;

typedef int8_t int_least8_t;
typedef uint8_t uint_least8_t;
typedef int16_t int_least16_t;
typedef uint16_t uint_least16_t;
typedef int32_t int_least32_t;
typedef uint32_t uint_least32_t;
typedef int64_t int_least64_t;

typedef int8_t int_fast8_t;
typedef uint8_t uint_fast8_t;
typedef int16_t int_fast16_t;
typedef uint16_t uint_fast16_t;
typedef int32_t int_fast32_t;
typedef uint32_t uint_fast32_t;
typedef int64_t int_fast64_t;
typedef uint64_t uint_fast64_t;

typedef unsigned long uintptr_t;
typedef signed long intptr_t;

#endif /* _STDINT_H */
59 changes: 55 additions & 4 deletions src/string/strlen.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,35 @@
#endif

#include <string.h> /* for size_t, strlen() */
#include <stdint.h> /* for uintptr_t */

/* The cast forces it to the correct size, even if size_t is 32-bit
* This checks for zeroes 4 or 8 bytes at a time, by subtracting 1 causing
* underflow to 0xFF on the zero bytes...
* 0x00 46 F3 01
* - 0x01 01 01 01
* -------------------
* 0xFF 45 F2 00
* ...masking it with the inverse which cancels out values which have the high
* bit set. This makes the 0 bytes 0xFF and bytes which have the high bit set,
* such as 0xF2 which when subtracted will also have the high bit set, cancel
* out.
* 0xFF 45 F2 FE
* & 0xFF B9 0D 00
* -------------------
* 0xFF 01 00 00
* and then masking by 0x80 to remove any leftovers:
* 0xFF 01 00 00
* & 0x80 80 80 80
* -------------------
* 0x80 00 00 00
* Now, any zero bytes will be 0x80, and non-zero bytes will be zero.
* This isn't foolproof, as if bits 24-30 are clear and bit 31 is set (on a
* 32-bit machine), this will be a false positive.
* http://graphics.stanford.edu/~seander/bithacks.html */
#define ONES (size_t)0x0101010101010101ULL
#define HIGH_BITS (size_t)0x8080808080808080ULL
#define haszero(v) (((v) - ONES) & ~(v) & HIGH_BITS)

/**
* @date 2013-06-11
Expand All @@ -13,10 +42,32 @@
*/
size_t
strlen(const char* const s) {

const char* p = s;
while (*p++ != '\0') {
/* no-op */
/* Avoid unaligned access. Even on platforms which allow unaligned access,
* doing this with an unaligned pointer puts us at risk of crossing a page
* boundary before finding the null byte.
* When we are aligned, this is completely safe and has been trusted since
* the early 90s. */
while ((uintptr_t)p & (sizeof(void*) - 1)) {
if (*p++ == '\0') {
return p - s - 1;
}
}
const size_t* longptr = (const size_t*)p;

for (;;) {
const size_t value = *longptr++;
if (haszero(value)) {
p = (const char *)(longptr - 1);
for (size_t i = 0; i < sizeof(size_t); i++) {
/* Check the individual bits. There is a chance that haszero may
* misfire. */
if (*p++ == '\0') {
return p - s - 1;
}
}
}
}
return p - s;
/* technically unreachable */
return p - s - 1;
}
66 changes: 62 additions & 4 deletions src/string/strnlen.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,73 @@
#endif

#include <string.h> /* for size_t, strnlen() */
#include <stdint.h> /* for uintptr_t */

/* The cast forces it to the correct size, even if size_t is 32-bit
* This checks for zeroes 4 or 8 bytes at a time, by subtracting 1 causing
* underflow to 0xFF on the zero bytes...
* 0x00 46 F3 01
* - 0x01 01 01 01
* -------------------
* 0xFF 45 F2 00
* ...masking it with the inverse which cancels out values which have the high
* bit set. This makes the 0 bytes 0xFF and bytes which have the high bit set,
* such as 0xF2 which when subtracted will also have the high bit set, cancel
* out.
* 0xFF 45 F2 FE
* & 0xFF B9 0D 00
* -------------------
* 0xFF 01 00 00
* and then masking by 0x80 to remove any leftovers:
* 0xFF 01 00 00
* & 0x80 80 80 80
* -------------------
* 0x80 00 00 00
* Now, any zero bytes will be 0x80, and non-zero bytes will be zero.
* This isn't foolproof, as if bits 24-30 are clear and bit 31 is set (on a
* 32-bit machine), this will be a false positive.
* http://graphics.stanford.edu/~seander/bithacks.html */
#define ONES ((size_t)0x0101010101010101ULL)
#define HIGH_BITS ((size_t)0x8080808080808080ULL)
#define haszero(v) (((v) - ONES) & ~(v) & HIGH_BITS)

/**
* @date 2013-05-26
* @date 2013-06-11
* @author Arto Bendiken
* @see http://libc11.org/string/strnlen.html
*/
size_t
strnlen(const char* const s,
const size_t maxlen) {
strnlen(const char* const s, size_t size) {
const char* p = s;
const char* end = s + size;
/* Avoid unaligned access. Even on platforms which allow unaligned access,
* doing this with an unaligned pointer puts us at risk of crossing a page
* boundary before finding the null byte.
* When we are aligned, this is completely safe and has been trusted since
* the early 90s. */
while ((uintptr_t)p & (sizeof(void*) - 1)) {
if (p == end || *p == '\0') {
return (p >= end ? size : (p - s));
}
p++;
}
const size_t* longptr = (const size_t*)p;

while (longptr < (const size_t*)end) {
const size_t value = *longptr++;

return (void)s, (void)maxlen, 0; // TODO
if (haszero(value)) {
p = (const char *)(longptr - 1);
for (size_t i = 0; i < sizeof(size_t); i++) {
/* Check the individual bits. There is a chance that haszero may
* misfire. */
if (p == end || *p == '\0') {
return (p > end ? size : (p - s));
}
p++;
}
}
p = (const char *)longptr;
}
return (p > end ? size : (p - s));
}
18 changes: 16 additions & 2 deletions test/string/check-strlen.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@

int
main(void) {
// TODO
return EXIT_SUCCESS;
#define TEST(expr, expected) if ((expr) != (expected)) { \
/* TODO: printf("TEST \"%s\" failed: expected %d, got %zu\n", #expr, (expected), (expr)); */ \
return EXIT_FAILURE; \
}

TEST(strlen(""), 0)
TEST(strlen("HELLO"), 5)
TEST(strlen("HE\0LLO"), 2)
TEST(strlen("HELLOHELLO#@#$tE9c=\001"), 20)
TEST(strlen("\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"), 10)
const char *long_string = "This is a long string";
TEST(strlen(long_string), 21)
// check unaligned
TEST(strlen(long_string + 1), 20)

return EXIT_SUCCESS;
}
21 changes: 19 additions & 2 deletions test/string/check-strnlen.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,26 @@
#include <stdlib.h> /* for EXIT_SUCCESS */

#include <string.h> /* for mem*(), str*() */

int
main(void) {
// TODO

#define TEST(expr, expected) if ((expr) != (expected)) { \
/* TODO: printf("TEST \"%s\" failed: expected %d, got %zu\n", #expr, (expected), (expr));*/ \
return EXIT_FAILURE; \
}

TEST(strnlen(NULL, 0), 0)
TEST(strnlen("", 10), 0)
TEST(strnlen("HELLO", 7), 5)
TEST(strnlen("HELLO", 2), 2)
TEST(strnlen("HELLO", 0), 0)
TEST(strnlen("HE\0LLO", 5), 2)
TEST(strnlen("HELLOHELLO#@#$tE9c=\001", 19), 19)
TEST(strnlen("\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80", 11), 10)
const char *long_string = "This is a long string";
TEST(strnlen(long_string, 32), 21)
// check unaligned
TEST(strnlen(long_string + 1, 32), 20)

return EXIT_SUCCESS;
}