From eced863cb54a187803a42c8f968fb5d16dd5cf8c Mon Sep 17 00:00:00 2001 From: mooinglemur Date: Fri, 14 Jul 2023 21:42:27 -0700 Subject: [PATCH] HostFS: major update: case-insensitive access and wildcards, ISO <-> UTF-8 translation for filenames (#132) * HostFS: major update: case-insensitive access and wildcards, ISO <-> UTF-8 translation for filenames * remove this ugly check * fix static declarations * minor bug causing ? wildcards not to work in filenames with utf-8 --- src/glue.h | 4 +- src/ieee.c | 712 ++++++++++++++++++++++++++++++++++++----------------- src/main.c | 8 +- 3 files changed, 497 insertions(+), 227 deletions(-) diff --git a/src/glue.h b/src/glue.h index 006b1f4a..f404168c 100644 --- a/src/glue.h +++ b/src/glue.h @@ -67,8 +67,8 @@ extern bool save_on_exit; extern bool disable_emu_cmd_keys; extern gif_recorder_state_t record_gif; extern char *gif_path; -extern char *fsroot_path; -extern char *startin_path; +extern uint8_t *fsroot_path; +extern uint8_t *startin_path; extern uint8_t keymap; extern bool warp_mode; extern bool testbench; diff --git a/src/ieee.c b/src/ieee.c index 8c1d7983..56634c85 100644 --- a/src/ieee.c +++ b/src/ieee.c @@ -47,10 +47,10 @@ bool log_ieee = false; bool ieee_initialized_once = false; -char error[80]; +uint8_t error[80]; int error_len = 0; int error_pos = 0; -char cmd[80]; +uint8_t cmd[80]; int cmdlen = 0; int namelen = 0; int channel = 0; @@ -61,7 +61,7 @@ bool overwrite = false; bool path_exists = false; bool prg_consumed = false; -char *hostfscwd = NULL; +uint8_t *hostfscwd = NULL; uint8_t dirlist[1024]; // Plenty large to hold a single entry int dirlist_len = 0; @@ -71,15 +71,15 @@ bool dirlist_eof = true; bool dirlist_timestmaps = false; bool dirlist_long = false; DIR *dirlist_dirp; -char dirlist_wildcard[256]; -char dirlist_type_filter; +uint8_t dirlist_wildcard[256]; +uint8_t dirlist_type_filter; uint16_t cbdos_flags = 0; const char *blocks_free = "BLOCKS FREE."; typedef struct { - char name[80]; + uint8_t name[80]; bool read; bool write; SDL_RWops *f; @@ -103,17 +103,28 @@ realpath(const char *path, char *resolved_path) { } #endif +#define u8strchr(A,B) (uint8_t *)strchr((char *)A,B) +#define u8strrchr(A,B) (uint8_t *)strrchr((char *)A,B) +#define u8strcpy(A,B) strcpy((char *)A,(char *)B) +#define u8strcmp(A,B) strcmp((char *)A,(char *)B) +#define u8strncmp(A,B,C) strncmp((char *)A,(char *)B,C) +#define u8strncpy(A,B,C) strncpy((char *)A,(char *)B,C) +#define u8strlen(A) strlen((char *)A) +#define u8stat(A,B) stat((char *)A,B) +#define u8realpath(A,B) (uint8_t *)realpath((char *)A,(char *)B) +#define u8getcwd(A,B) (uint8_t *)getcwd((char *)A,B) + // Prototypes for some of the static functions static void clear_error(); static void set_error(int e, int t, int s); -static void cchdir(char *dir); -static int cgetcwd(char *buf, size_t len); +static void cchdir(uint8_t *dir); +static int cgetcwd(uint8_t *buf, size_t len); static void cseek(int channel, uint32_t pos); -static void cmkdir(char *dir); -static void crmdir(char *dir); -static void cunlink(char *f); -static void crename(char *f); +static void cmkdir(uint8_t *dir); +static void crmdir(uint8_t *dir); +static void cunlink(uint8_t *f); +static void crename(uint8_t *f); // Functions @@ -135,20 +146,104 @@ get_kernal_cbdos_flags(void) } +static void +utf8_to_iso(uint8_t *dst, const uint8_t *src) +{ + int i = 0; + int j = 0; + + for (i = 0; src[i]; i++) { + // ASCII range is copied verbatim + if (src[i] < 0x80) { + dst[j++] = src[i]; + continue; + } + // Invalid first UTF-8 byte + if (src[i] < 0xc0) { + dst[j++] = '?'; + continue; + } + // Invalid singular UTF-8 byte + if (src[i+1] < 0x80) { + dst[j++] = '?'; + continue; + } + + uint32_t cp; + int remlen; + + if ((src[i] & 0xf8) == 0xf0) { // four byte + cp = src[i] & 0x07; + remlen = 3; + } else if ((src[i] & 0xf0) == 0xe0) { // three byte + cp = src[i] & 0x0f; + remlen = 2; + } else { // two byte + cp = src[i] & 0x1f; + remlen = 1; + } + + // construct the integer codepoint + for (; remlen > 0; remlen--) { + cp <<= 6; + cp |= (src[++i] & 0x3f); + } + + // Resolve the Unicode codepoints that are > 0xff but are in ISO-8859-15 + switch (cp) { + case 0x20ac: // € + cp = 0xa4; + break; + case 0x0160: // Š + cp = 0xa6; + break; + case 0x0161: // š + cp = 0xa8; + break; + case 0x017d: // Ž + cp = 0xb4; + break; + case 0x017e: // ž + cp = 0xb8; + break; + case 0x0152: // Œ + cp = 0xbc; + break; + case 0x0153: // œ + cp = 0xbd; + break; + case 0x0178: // Ÿ + cp = 0xbe; + break; + default: + // do nothing here + break; + } + + if (cp > 0xff) { + dst[j++] = '?'; + } else { + dst[j++] = cp; + } + } + + dst[j] = 0; +} + // Puts the emulated cwd in buf, up to the maximum length specified by len // Turn null termination into a space // This is for displaying in the directory header static int -cgetcwd(char *buf, size_t len) +cgetcwd(uint8_t *buf, size_t len) { int o = 0; - char l = fsroot_path[strlen(fsroot_path)-1]; + uint8_t l = fsroot_path[u8strlen(fsroot_path)-1]; if (l == '/' || l == '\\') o--; - if (strlen(fsroot_path) == strlen(hostfscwd)) - strncpy(buf, "/", len); + if (u8strlen(fsroot_path) == u8strlen(hostfscwd)) + u8strncpy(buf, "/", len); else - strncpy(buf, hostfscwd+strlen(fsroot_path)+o, len); + u8strncpy(buf, hostfscwd+u8strlen(fsroot_path)+o, len); // Turn backslashes into slashes for (o = 0; o < len; o++) { if (buf[o] == 0) buf[o] = ' '; @@ -158,16 +253,93 @@ cgetcwd(char *buf, size_t len) return 0; } -static char * -parse_dos_filename(const char *name) +static uint32_t +utf8_to_codepoint_i(uint8_t *str, int *off) +{ + // convert each UTF-8 sequence into a numeric codepoint + int remlen; + uint32_t cp; + + if ((str[*off] & 0xf8) == 0xf0) { // four byte + cp = str[*off] & 0x07; + remlen = 3; + } else if ((str[*off] & 0xf0) == 0xe0) { // three byte + cp = str[*off] & 0x0f; + remlen = 2; + } else if ((str[*off] & 0xe0) == 0xc0) { // two byte + cp = str[*off] & 0x1f; + remlen = 1; + } else { + cp = str[*off]; + remlen = 0; + } + + // construct the integer codepoint + for (; remlen > 0; remlen--) { + cp <<= 6; + cp |= (str[++*off] & 0x3f); + } + + // now case-fold it + // ASCII letters, and most of ISO letters + if ((cp >= 0x41 && cp <= 0x5a) || (cp >= 0xc0 && cp <= 0xfe)) cp += 0x20; + // fold the Š + else if (cp == 0x0160) cp = 0x0161; + // fold the Ž + else if (cp == 0x017d) cp = 0x017e; + // fold the Œ + else if (cp == 0x0152) cp = 0x0153; + // fold the Ÿ + else if (cp == 0x0178) cp = 0xff; + + return cp; +} + +static uint8_t +case_fold_iso(uint8_t c) { + uint8_t f = c; + if ((f >= 0x41 && f <= 0x5a) || (f >= 0xc0 && f <= 0xfe)) f += 0x20; + // fold the Š + else if (f == 0xa6) f = 0xa8; + // fold the Ž + else if (f == 0xb4) f = 0xb8; + // fold the Œ + else if (f == 0xbc) f = 0xbd; + // fold the Ÿ + else if (f == 0xbe) f = 0xff; + + return f; +} + +// compare two characters in a UTF-8 string +// in a case-insensitive manner, +// return value is cp2 - cp1 after case folding +// hence will be zero on match +static int +u8compare_utf8_char_i(uint8_t *str1, uint8_t *str2, int *off1, int *off2) +{ + uint32_t cp1, cp2; + + // these functions will increment *off1/*off2 to the + // byte before the next character if the UTF-8 + // strings are multi-byte + cp1 = utf8_to_codepoint_i(str1, off1); + cp2 = utf8_to_codepoint_i(str2, off2); + + return cp2 - cp1; +} + + +static uint8_t * +parse_dos_filename(const uint8_t *name) { // in case the name starts with something with special meaning, // such as @0: - char *name_ptr; - char *newname = malloc(strlen(name)+1); + uint8_t *name_ptr; + uint8_t *newname = malloc(u8strlen(name)+1); int i, j; - newname[strlen(name)] = 0; + newname[u8strlen(name)] = 0; overwrite = false; @@ -183,11 +355,11 @@ parse_dos_filename(const char *name) // This routine only parses the bits before the ':' // and normalizes directory parts by attaching them to the name part - // resolve_path() is responsible for resolving absolute and relative + // resolve_path*() is responsible for resolving absolute and relative // paths, and for processing the wildcard option - if ((name_ptr = strchr(name,':'))) { + if ((name_ptr = u8strchr(name,':'))) { name_ptr++; i = 0; j = 0; @@ -217,33 +389,38 @@ parse_dos_filename(const char *name) } } - strcpy(newname+j,name_ptr); + u8strcpy(newname+j,name_ptr); } else { - strcpy(newname,name); + u8strcpy(newname,name); } return newname; } // Returns a ptr to malloc()ed space which must be free()d later, or NULL -static char * -resolve_path(const char *name, bool must_exist, int wildcard_filetype) +static uint8_t * +resolve_path_utf8(const uint8_t *name, bool must_exist, int wildcard_filetype) { path_exists = false; clear_error(); // Resolve the filename in the context of the emulated cwd // allocate plenty of string space - char *tmp = malloc(strlen(name)+strlen(hostfscwd)+2); - char *c; - char *d; - char *ret; + uint8_t *tmp = malloc(u8strlen(name)+u8strlen(hostfscwd)+2); + uint8_t *tmp2 = malloc(u8strlen(name)+u8strlen(hostfscwd)+2); + uint8_t *c; + uint8_t *d; + uint8_t *ret; DIR *dirp; struct dirent *dp; struct stat st; int i; + int j; + bool has_wildcard_chars = false; - if (tmp == NULL) { + if (tmp == NULL || tmp2 == NULL) { + if (tmp) free(tmp); + if (tmp2) free(tmp2); set_error(0x70, 0, 0); return NULL; } @@ -252,118 +429,141 @@ resolve_path(const char *name, bool must_exist, int wildcard_filetype) // slash(es) and all, otherwise append it to the cwd, but with a / // in between if (name[0] == '/' || name[0] == '\\') { // absolute - strcpy(tmp, fsroot_path); - strcpy(tmp+strlen(fsroot_path), name); + u8strcpy(tmp, fsroot_path); + u8strcpy(tmp+u8strlen(fsroot_path), name); } else { // relative - strcpy(tmp, hostfscwd); - tmp[strlen(hostfscwd)] = '/'; - strcpy(tmp+strlen(hostfscwd)+1, name); + u8strcpy(tmp, hostfscwd); + tmp[u8strlen(hostfscwd)] = '/'; + u8strcpy(tmp+u8strlen(hostfscwd)+1, name); } - if (strchr(tmp,'*') || strchr(tmp,'?')) { // oh goodie, a wildcard - // we have to search the directory for the first occurrence - // in directory order + // keep the original parsed name + u8strcpy(tmp2, tmp); - c = strrchr(tmp,'\\'); - d = strrchr(tmp,'/'); + if (u8strchr(tmp,'*') || u8strchr(tmp,'?')) { // oh goodie, a wildcard + has_wildcard_chars = true; + } + // We have to search the directory for the first occurrence + // in directory order. - if (c == NULL && d == NULL) { // This should never happen - free(tmp); - set_error(0x62, 0, 0); - return NULL; - } + c = u8strrchr(tmp,'\\'); + d = u8strrchr(tmp,'/'); - // Chop off ret at the last path separator - // and set c to point at the element with the wildcard - // pattern - if (c > d) { - *c = 0; - c++; - } else { - *d = 0; - c = d+1; - } + if (c == NULL && d == NULL) { // This should never happen + free(tmp); + free(tmp2); + set_error(0x62, 0, 0); + return NULL; + } - if (!(dirp = opendir(tmp))) { // Directory couldn't be opened - free(tmp); - set_error(0x62, 0, 0); - return NULL; - } + // Chop off ret at the last path separator + // and set c to point at the element with the wildcard + // pattern + if (c > d) { + *c = 0; + c++; + } else { + *d = 0; + c = d+1; + } - ret = NULL; + // XXX + // + // tmp could have been populated with path parts that have not been checked + // this will fail on case-sensitive filesystems + // but if the user did not specify a path part in their filename + // we should be able to resolve the new part of the name + // we'll probably want to fix this at some point + // but it requires some thought - bool found = false; + if (!(dirp = opendir((char *)tmp))) { // Directory couldn't be opened + free(tmp); + free(tmp2); + set_error(0x62, 0, 0); + return NULL; + } - while ((dp = readdir(dirp))) { - // in a wildcard match that starts at first position, leading dot filenames are not considered - if ((*c == '*' || *c == '?') && *(dp->d_name) == '.') - continue; - for (i = 0; i < strlen(c) && i < strlen(dp->d_name); i++) { - if (c[i] == '*') { - found = true; - break; - } else if (c[i] == '?') { - continue; - } else if (c[i] != (dp->d_name)[i]) { - break; - } - } + ret = NULL; - // If we reach the end of both strings, it's a match - if (i == strlen(dp->d_name) && i == strlen(c)) - found = true; + bool found = false; - // If we reach the end of the filename, but the next char - // in the search string is *, then it's also a match - else if (i == strlen(dp->d_name) && c[i] == '*') + while ((dp = readdir(dirp))) { + // in a wildcard match that starts at first position, leading dot filenames are not considered + if ((*c == '*' || *c == '?') && *(dp->d_name) == '.') + continue; + for (i = 0, j = 0; j < u8strlen(c) && i < u8strlen(dp->d_name); i++, j++) { + if (c[j] == '*') { found = true; - - if (found) { // simple wildcard match - ret = malloc(strlen(tmp)+strlen(dp->d_name)+2); - if (ret == NULL) { // memory allocation error - free(tmp); - closedir(dirp); - set_error(0x70, 0, 0); - return NULL; - } - strcpy(ret, tmp); - ret[strlen(tmp)] = '/'; - strcpy(ret+strlen(tmp)+1, dp->d_name); - if (wildcard_filetype) { - stat(ret, &st); - // in a wildcard match where the filetype is wrong, mark as not found - // and continue - if (wildcard_filetype == WILDCARD_DIR && !S_ISDIR(st.st_mode)) { - free(ret); - ret = NULL; - found = false; - continue; - } else if (wildcard_filetype == WILDCARD_PRG && !S_ISREG(st.st_mode)) { - free(ret); - ret = NULL; - found = false; - continue; - } - } + break; + } else if (c[j] == '?') { + // '?' needs to eat the extra UTF-8 bytes if applicable + if (((dp->d_name)[i] & 0xe0) == 0xc0) i++; + else if (((dp->d_name)[i] & 0xf0) == 0xe0) i+=2; + else if (((dp->d_name)[i] & 0xf8) == 0xf0) i+=3; + continue; + } else if (u8compare_utf8_char_i(c, (uint8_t *)(dp->d_name), &j, &i)) { break; } } - closedir(dirp); - free(tmp); + // If we reach the end of both strings, it's a match + if (i == u8strlen(dp->d_name) && j == u8strlen(c)) + found = true; - if (!ret) { // No wildcard match - set_error(0x62, 0, 0); - return NULL; + // If we reach the end of the filename, but the next char + // in the search string is *, then it's also a match + else if (i == u8strlen(dp->d_name) && c[j] == '*') + found = true; + + if (found) { // simple wildcard match + ret = malloc(u8strlen(tmp)+u8strlen(dp->d_name)+2); + if (ret == NULL) { // memory allocation error + free(tmp); + free(tmp2); + closedir(dirp); + set_error(0x70, 0, 0); + return NULL; + } + u8strcpy(ret, tmp); + ret[u8strlen(tmp)] = '/'; + u8strcpy(ret+u8strlen(tmp)+1, dp->d_name); + if (wildcard_filetype) { + u8stat(ret, &st); + // in a wildcard match where the filetype is wrong, mark as not found + // and continue + if (wildcard_filetype == WILDCARD_DIR && !S_ISDIR(st.st_mode)) { + free(ret); + ret = NULL; + found = false; + continue; + } else if (wildcard_filetype == WILDCARD_PRG && !S_ISREG(st.st_mode)) { + free(ret); + ret = NULL; + found = false; + continue; + } + } + break; } + } + closedir(dirp); - tmp = ret; // section after this expects the topic to be in tmp + free(tmp); + if (ret) { // We had a match + free(tmp2); + tmp = ret; + } else if (has_wildcard_chars) { + // our original query had * or ? and we didn't find it + free(tmp2); + set_error(0x62, 0, 0); + return NULL; + } else { // reset tmp to the original name + tmp = tmp2; } - // now resolve the path using OS routines - ret = realpath(tmp, NULL); + ret = u8realpath(tmp, NULL); free(tmp); if (ret == NULL) { @@ -373,20 +573,20 @@ resolve_path(const char *name, bool must_exist, int wildcard_filetype) } else { // path does not exist, but as long as everything but the final // path element exists, we're still okay. - tmp = malloc(strlen(name)+1); + tmp = malloc(u8strlen(name)+1); if (tmp == NULL) { set_error(0x70, 0, 0); return NULL; } - strcpy(tmp, name); - c = strrchr(tmp, '/'); + u8strcpy(tmp, name); + c = u8strrchr(tmp, '/'); if (c == NULL) - c = strrchr(tmp, '\\'); + c = u8strrchr(tmp, '\\'); if (c != NULL) *c = 0; // truncate string here // assemble a path with what we have left - ret = malloc(strlen(tmp)+strlen(hostfscwd)+2); + ret = malloc(u8strlen(tmp)+u8strlen(hostfscwd)+2); if (ret == NULL) { free(tmp); set_error(0x70, 0, 0); @@ -394,12 +594,12 @@ resolve_path(const char *name, bool must_exist, int wildcard_filetype) } if (name[0] == '/' || name[0] == '\\') { // absolute - strcpy(ret, fsroot_path); - strcpy(ret+strlen(fsroot_path), tmp); + u8strcpy(ret, fsroot_path); + u8strcpy(ret+u8strlen(fsroot_path), tmp); } else { // relative - strcpy(ret, hostfscwd); - *(ret+strlen(hostfscwd)) = '/'; - strcpy(ret+strlen(hostfscwd)+1, tmp); + u8strcpy(ret, hostfscwd); + *(ret+u8strlen(hostfscwd)) = '/'; + u8strcpy(ret+u8strlen(hostfscwd)+1, tmp); } free(tmp); @@ -407,7 +607,7 @@ resolve_path(const char *name, bool must_exist, int wildcard_filetype) // if we found a path separator in the name string // we check everything up to that final separator if (c != NULL) { - tmp = realpath(ret, NULL); + tmp = u8realpath(ret, NULL); free(ret); if (tmp == NULL) { // missing parent path element too @@ -417,14 +617,14 @@ resolve_path(const char *name, bool must_exist, int wildcard_filetype) free(tmp); // found everything up to the parent path element // restore ret to original case - ret = malloc(strlen(name)+strlen(hostfscwd)+2); + ret = malloc(u8strlen(name)+u8strlen(hostfscwd)+2); if (ret == NULL) { set_error(0x70, 0, 0); return NULL; } - strcpy(ret, hostfscwd); - ret[strlen(hostfscwd)] = '/'; - strcpy(ret+strlen(hostfscwd)+1, name); + u8strcpy(ret, hostfscwd); + ret[u8strlen(hostfscwd)] = '/'; + u8strcpy(ret+u8strlen(hostfscwd)+1, name); } } } @@ -437,19 +637,19 @@ resolve_path(const char *name, bool must_exist, int wildcard_filetype) // Prevent resolving outside the fsroot_path - if (strlen(fsroot_path) > strlen(ret)) { + if (u8strlen(fsroot_path) > u8strlen(ret)) { free(ret); set_error(0x62, 0, 0); return NULL; - } else if (strncmp(fsroot_path, ret, strlen(fsroot_path))) { + } else if (u8strncmp(fsroot_path, ret, u8strlen(fsroot_path))) { free(ret); set_error(0x62, 0, 0); return NULL; - } else if (strlen(fsroot_path) < strlen(ret) && - fsroot_path[strlen(fsroot_path)-1] != '/' && - fsroot_path[strlen(fsroot_path)-1] != '\\' && - ret[strlen(fsroot_path)] != '/' && - ret[strlen(fsroot_path)] != '\\') + } else if (u8strlen(fsroot_path) < u8strlen(ret) && + fsroot_path[u8strlen(fsroot_path)-1] != '/' && + fsroot_path[u8strlen(fsroot_path)-1] != '\\' && + ret[u8strlen(fsroot_path)] != '/' && + ret[u8strlen(fsroot_path)] != '\\') { // ret matches beginning of fsroot_path, // end of fsroot_path is not a path-separator, @@ -467,9 +667,73 @@ resolve_path(const char *name, bool must_exist, int wildcard_filetype) } +static uint8_t * +resolve_path_iso(const uint8_t *name, bool must_exist, int wildcard_filetype) +{ + uint8_t *ret; + uint8_t *buf; + int i; + int j = 0; + + buf = malloc(u8strlen(name)*3+1); + for (i = 0; name[i]; i++) { + // 0x00-0x7f is verbatim + if (name[i] < 0x80) { + buf[j++] = name[i]; + continue; + } + + // Resolve the Unicode codepoints + switch (name[i]) { + case 0xa4: // € + buf[j++] = 0xe2; + buf[j++] = 0x82; + buf[j++] = 0xac; + break; + case 0xa6: // Š + buf[j++] = 0xc5; + buf[j++] = 0xa0; + break; + case 0xa8: // š + buf[j++] = 0xc5; + buf[j++] = 0xa1; + break; + case 0xb4: // Ž + buf[j++] = 0xc5; + buf[j++] = 0xbd; + break; + case 0xb8: // ž + buf[j++] = 0xc5; + buf[j++] = 0xbe; + break; + case 0xbc: // Œ + buf[j++] = 0xc5; + buf[j++] = 0x92; + break; + case 0xbd: // œ + buf[j++] = 0xc5; + buf[j++] = 0x93; + break; + case 0xbe: // Ÿ + buf[j++] = 0xc5; + buf[j++] = 0xb8; + break; + default: + buf[j++] = (0xc0 | (name[i] >> 6)); + buf[j++] = (0x80 | (name[i] & 0x3f)); + break; + } + } + + buf[j] = 0; + + ret = resolve_path_utf8(buf, must_exist, wildcard_filetype); + free(buf); + return ret; +} static int -create_directory_listing(uint8_t *data, char *dirstring) +create_directory_listing(uint8_t *data, uint8_t *dirstring) { uint8_t *data_start = data; @@ -487,11 +751,11 @@ create_directory_listing(uint8_t *data, char *dirstring) // Here's where we parse out directory listing options // Such as "$=T:MATCH*=P" - while (i < strlen(dirstring)) { + while (i < u8strlen(dirstring)) { if (dirstring[i] == ':' || i == 0) { i++; j = 0; - while (i < strlen(dirstring)) { + while (i < u8strlen(dirstring)) { if (i == 1 && dirstring[i] == ':') i++; if (dirstring[i] == '=' || dirstring[i] == 0) { @@ -529,7 +793,7 @@ create_directory_listing(uint8_t *data, char *dirstring) for (int i = 0; i < 16; i++) { *data++ = ' '; } - if (cgetcwd((char *)data - 16, 16)) { + if (cgetcwd(data - 16, 16)) { return false; } *data++ = '"'; @@ -541,7 +805,7 @@ create_directory_listing(uint8_t *data, char *dirstring) *data++ = ' '; *data++ = 0; - if (!(dirlist_dirp = opendir(hostfscwd))) { + if (!(dirlist_dirp = opendir((char *)hostfscwd))) { return 0; } dirlist_eof = false; @@ -555,15 +819,15 @@ continue_directory_listing(uint8_t *data) struct stat st; struct dirent *dp; size_t file_size; - char *tmpnam; + uint8_t *tmpnam; bool found; int i; while ((dp = readdir(dirlist_dirp))) { - size_t namlen = strlen(dp->d_name); - tmpnam = resolve_path(dp->d_name, true, WILDCARD_ALL); + size_t namlen = u8strlen(dp->d_name); + tmpnam = resolve_path_utf8((uint8_t *)dp->d_name, true, WILDCARD_ALL); if (tmpnam == NULL) continue; - stat(tmpnam, &st); + u8stat(tmpnam, &st); free(tmpnam); // Type match @@ -582,36 +846,39 @@ continue_directory_listing(uint8_t *data) // don't show the . or .. in the root directory // this behaves like SD card/FAT32 - if (!strcmp("..",dp->d_name) || !strcmp(".",dp->d_name)) { - if (!strcmp(hostfscwd,fsroot_path)) { + if (!u8strcmp("..",dp->d_name) || !u8strcmp(".",dp->d_name)) { + if (!u8strcmp(hostfscwd,fsroot_path)) { continue; } } + tmpnam = malloc(namlen+1); + utf8_to_iso(tmpnam, (uint8_t *)dp->d_name); + if (dirlist_wildcard[0]) { // wildcard match selected // in a wildcard match that starts at first position, leading dot filenames are not considered - if ((dirlist_wildcard[0] == '*' || dirlist_wildcard[0] == '?') && *(dp->d_name) == '.') + if ((dirlist_wildcard[0] == '*' || dirlist_wildcard[0] == '?') && *(tmpnam) == '.') continue; found = false; - for (i = 0; i < strlen(dirlist_wildcard) && i < strlen(dp->d_name); i++) { + for (i = 0; i < u8strlen(dirlist_wildcard) && i < u8strlen(tmpnam); i++) { if (dirlist_wildcard[i] == '*') { found = true; break; } else if (dirlist_wildcard[i] == '?') { continue; - } else if (dirlist_wildcard[i] != (dp->d_name)[i]) { + } else if (case_fold_iso(dirlist_wildcard[i]) != case_fold_iso((tmpnam)[i])) { break; } } // If we reach the end of both strings, it's a match - if (i == strlen(dp->d_name) && i == strlen(dirlist_wildcard)) + if (i == u8strlen(tmpnam) && i == u8strlen(dirlist_wildcard)) found = true; // If we reach the end of the filename and the following character in // the wildcard string is *, it's also a match - else if (i == strlen(dp->d_name) && dirlist_wildcard[i] == '*') + else if (i == u8strlen(tmpnam) && dirlist_wildcard[i] == '*') found = true; if (!found) continue; @@ -662,7 +929,8 @@ continue_directory_listing(uint8_t *data) //if (namlen > 16) { // namlen = 16; // TODO hack //} - memcpy(data, dp->d_name, namlen); + namlen = u8strlen(tmpnam); + memcpy(data, tmpnam, namlen); data += namlen; *data++ = '"'; for (int i = namlen; i < 16; i++) { @@ -705,6 +973,8 @@ continue_directory_listing(uint8_t *data) data += sprintf((char *)data, "%02X %08X ", attrbyte, (unsigned int)fullsize); } + free(tmpnam); + *data++ = 0; return data - data_start; } @@ -716,8 +986,8 @@ continue_directory_listing(uint8_t *data) *data++ = 255; // "65535" *data++ = 255; - memcpy(data, blocks_free, strlen(blocks_free)); - data += strlen(blocks_free); + memcpy(data, blocks_free, u8strlen(blocks_free)); + data += u8strlen(blocks_free); *data++ = 0; // link @@ -749,7 +1019,7 @@ create_cwd_listing(uint8_t *data) for (int i = 0; i < 16; i++) { *data++ = ' '; } - if (cgetcwd((char *)data - 16, 16)) { + if (cgetcwd(data - 16, 16)) { dirlist_eof = true; return 0; } @@ -762,14 +1032,14 @@ create_cwd_listing(uint8_t *data) *data++ = ' '; *data++ = 0; - char *tmp = malloc(strlen(hostfscwd)+1); + uint8_t *tmp = malloc(u8strlen(hostfscwd)+1); if (tmp == NULL) { set_error(0x70, 0, 0); return 0; } - int i = strlen(hostfscwd); - int j = strlen(fsroot_path); - strcpy(tmp,hostfscwd); + int i = u8strlen(hostfscwd); + int j = u8strlen(fsroot_path); + u8strcpy(tmp,hostfscwd); for(; i>= j-1; --i) { // find the beginning of a path element @@ -779,11 +1049,11 @@ create_cwd_listing(uint8_t *data) tmp[i-1]=0; if (i < j) { - strcpy(tmp+i,"/"); + u8strcpy(tmp+i,"/"); } file_size = 0; - size_t namlen = strlen(tmp+i); + size_t namlen = u8strlen(tmp+i); if (!namlen) continue; // there was a doubled path separator @@ -826,8 +1096,8 @@ create_cwd_listing(uint8_t *data) *data++ = 255; // "65535" *data++ = 255; - memcpy(data, blocks_free, strlen(blocks_free)); - data += strlen(blocks_free); + memcpy(data, blocks_free, u8strlen(blocks_free)); + data += u8strlen(blocks_free); *data++ = 0; // link @@ -915,8 +1185,8 @@ set_activity(bool active) static void set_error(int e, int t, int s) { - snprintf(error, sizeof(error), "%02x,%s,%02d,%02d\r", e, error_string(e), t, s); - error_len = strlen(error); + snprintf((char *)error, sizeof(error), "%02x,%s,%02d,%02d\r", e, error_string(e), t, s); + error_len = u8strlen(error); error_pos = 0; uint8_t cbdos_flags = get_kernal_cbdos_flags(); if (e < 0x10 || e == 0x73) { @@ -935,7 +1205,7 @@ clear_error() static void -command(char *cmd) +command(uint8_t *cmd) { if (!cmd[0]) { return; @@ -1019,20 +1289,20 @@ command(char *cmd) } static void -cchdir(char *dir) +cchdir(uint8_t *dir) { // The directory name is in dir, coming from the command channel // with the CD: portion stripped off - char *resolved; + uint8_t *resolved; struct stat st; - if ((resolved = resolve_path(dir, true, WILDCARD_DIR)) == NULL) { + if ((resolved = resolve_path_iso(dir, true, WILDCARD_DIR)) == NULL) { // error already set return; } // Is it a directory? - if (stat(resolved, &st)) { + if (u8stat(resolved, &st)) { // FNF free(resolved); set_error(0x62, 0, 0); @@ -1050,21 +1320,21 @@ cchdir(char *dir) } static void -cmkdir(char *dir) +cmkdir(uint8_t *dir) { // The directory name is in dir, coming from the command channel // with the MD: portion stripped off - char *resolved; + uint8_t *resolved; clear_error(); - if ((resolved = resolve_path(dir, false, WILDCARD_DIR)) == NULL) { + if ((resolved = resolve_path_iso(dir, false, WILDCARD_DIR)) == NULL) { // error already set return; } #ifdef __MINGW32__ - if (_mkdir(resolved)) + if (_mkdir((char *)resolved)) #else - if (mkdir(resolved,0777)) + if (mkdir((char *)resolved,0777)) #endif { if (errno == EEXIST) { @@ -1080,18 +1350,18 @@ cmkdir(char *dir) } static void -crename(char *f) +crename(uint8_t *f) { // This function receives the whole R command, which could be // "R:NEW=OLD" or "RENAME:NEW=OLD" or anything in between // let's simply find the first colon and chop it there - char *tmp = malloc(strlen(f)+1); + uint8_t *tmp = malloc(u8strlen(f)+1); if (tmp == NULL) { set_error(0x70, 0, 0); return; } - strcpy(tmp,f); - char *d = strchr(tmp,':'); + u8strcpy(tmp,f); + uint8_t *d = u8strchr(tmp,':'); if (d == NULL) { // No colon, not a valid rename command @@ -1103,7 +1373,7 @@ crename(char *f) d++; // one character after the colon // Now split on the = sign to find - char *s = strchr(d,'='); + uint8_t *s = u8strchr(d,'='); if (s == NULL) { // No equals sign, not a valid rename command @@ -1114,18 +1384,18 @@ crename(char *f) *(s++) = 0; // null-terminate d and advance s - char *src; - char *dst; + uint8_t *src; + uint8_t *dst; clear_error(); - if ((src = resolve_path(s, true, WILDCARD_ALL)) == NULL) { + if ((src = resolve_path_iso(s, true, WILDCARD_ALL)) == NULL) { // source not found free(tmp); set_error(0x62, 0, 0); return; } - if ((dst = resolve_path(d, false, WILDCARD_ALL)) == NULL) { + if ((dst = resolve_path_iso(d, false, WILDCARD_ALL)) == NULL) { // dest not found free(tmp); free(src); @@ -1135,7 +1405,7 @@ crename(char *f) free(tmp); // we're now done with d and s (part of tmp) - if (rename(src, dst)) { + if (rename((char *)src, (char *)dst)) { if (errno == EACCES) { set_error(0x63, 0, 0); } else if (errno == EINVAL) { @@ -1153,19 +1423,19 @@ crename(char *f) static void -crmdir(char *dir) +crmdir(uint8_t *dir) { // The directory name is in dir, coming from the command channel // with the RD: portion stripped off - char *resolved; + uint8_t *resolved; clear_error(); - if ((resolved = resolve_path(dir, true, WILDCARD_DIR)) == NULL) { + if ((resolved = resolve_path_iso(dir, true, WILDCARD_DIR)) == NULL) { set_error(0x39, 0, 0); return; } - if (rmdir(resolved)) { + if (rmdir((char *)resolved)) { if (errno == ENOTEMPTY || errno == EACCES) { set_error(0x63, 0, 0); } else { @@ -1180,19 +1450,19 @@ crmdir(char *dir) static void -cunlink(char *f) +cunlink(uint8_t *f) { // This function receives the whole S command, which could be // "S:FILENAME" or "SCRATCH:FILENAME" or anything in between // let's simply find the first colon and chop it there // TODO path syntax and multiple files - char *tmp = malloc(strlen(f)+1); + uint8_t *tmp = malloc(u8strlen(f)+1); if (tmp == NULL) { set_error(0x70, 0, 0); return; } - strcpy(tmp,f); - char *fn = strchr(tmp,':'); + u8strcpy(tmp,f); + uint8_t *fn = u8strchr(tmp,':'); if (fn == NULL) { // No colon, not a valid scratch command @@ -1202,10 +1472,10 @@ cunlink(char *f) } fn++; // one character after the colon - char *resolved; + uint8_t *resolved; clear_error(); - if ((resolved = resolve_path(fn, true, WILDCARD_PRG)) == NULL) { + if ((resolved = resolve_path_iso(fn, true, WILDCARD_PRG)) == NULL) { free(tmp); set_error(0x62, 0, 0); return; @@ -1213,7 +1483,7 @@ cunlink(char *f) free(tmp); // we're now done with fn (part of tmp) - if (unlink(resolved)) { + if (unlink((char *)resolved)) { if (errno == EACCES) { set_error(0x63, 0, 0); } else { @@ -1237,18 +1507,18 @@ copen(int channel) return -1; } - char *resolved_filename = NULL; - char *parsed_filename = NULL; + uint8_t *resolved_filename = NULL; + uint8_t *parsed_filename = NULL; int ret = -1; // decode ",P,W"-like suffix to know whether we're writing bool append = false; channels[channel].read = true; channels[channel].write = false; - char *first = strchr(channels[channel].name, ','); + uint8_t *first = u8strchr(channels[channel].name, ','); if (first) { *first = 0; // truncate name here - char *second = strchr(first+1, ','); + uint8_t *second = u8strchr(first+1, ','); if (second) { switch(second[1]) { case 'A': @@ -1278,7 +1548,7 @@ copen(int channel) if (!channels[channel].write && channels[channel].name[0] == '$') { dirlist_pos = 0; - if (!strncmp(channels[channel].name,"$=C",3)) { + if (!u8strncmp(channels[channel].name,"$=C",3)) { // This emulates the behavior in the ROM code in // https://github.com/X16Community/x16-rom/pull/5 dirlist_len = create_cwd_listing(dirlist); @@ -1286,7 +1556,7 @@ copen(int channel) dirlist_len = create_directory_listing(dirlist, channels[channel].name); } } else { - if (!strcmp(channels[channel].name, ":*") && prg_file && !prg_consumed) { + if (!u8strcmp(channels[channel].name, ":*") && prg_file && !prg_consumed) { channels[channel].f = prg_file; // special case prg_consumed = true; } else { @@ -1295,7 +1565,7 @@ copen(int channel) return -2; } - resolved_filename = resolve_path(parsed_filename, false, WILDCARD_PRG); + resolved_filename = resolve_path_iso(parsed_filename, false, WILDCARD_PRG); free(parsed_filename); if (resolved_filename == NULL) { @@ -1310,11 +1580,11 @@ copen(int channel) } if (append) { - channels[channel].f = SDL_RWFromFile(resolved_filename, "ab+"); + channels[channel].f = SDL_RWFromFile((char *)resolved_filename, "ab+"); } else if (channels[channel].read && channels[channel].write) { - channels[channel].f = SDL_RWFromFile(resolved_filename, "rb+"); + channels[channel].f = SDL_RWFromFile((char *)resolved_filename, "rb+"); } else { - channels[channel].f = SDL_RWFromFile(resolved_filename, channels[channel].write ? "wb" : "rb"); + channels[channel].f = SDL_RWFromFile((char *)resolved_filename, channels[channel].write ? "wb" : "rb"); } free(resolved_filename); @@ -1370,18 +1640,18 @@ ieee_init() if (fsroot_path == NULL) { // if null, default to cwd // We hold this for the lifetime of the program, and we don't have // any sort of destructor, so we rely on the OS teardown to free() it. - fsroot_path = getcwd(NULL, 0); + fsroot_path = u8getcwd(NULL, 0); } else { // Normalize it - fsroot_path = realpath(fsroot_path, NULL); + fsroot_path = u8realpath(fsroot_path, NULL); } if (startin_path == NULL) { // same as above - startin_path = getcwd(NULL, 0); + startin_path = u8getcwd(NULL, 0); } else { // Normalize it - startin_path = realpath(startin_path, NULL); + startin_path = u8realpath(startin_path, NULL); } // Quick error checks if (fsroot_path == NULL) { @@ -1399,7 +1669,7 @@ ieee_init() // If startin_path is not reachable, we instead default to setting it // back to fsroot_path - if (strncmp(fsroot_path, startin_path, strlen(fsroot_path))) { // not equal + if (u8strncmp(fsroot_path, startin_path, u8strlen(fsroot_path))) { // not equal free(startin_path); startin_path = fsroot_path; } @@ -1423,12 +1693,12 @@ ieee_init() } // Now initialize our emulated cwd. - hostfscwd = malloc(strlen(startin_path)+1); + hostfscwd = malloc(u8strlen(startin_path)+1); if (hostfscwd == NULL) { fprintf(stderr, "Failed to allocate memory for hostfscwd\n"); exit(1); } - strcpy(hostfscwd, startin_path); + u8strcpy(hostfscwd, startin_path); // Locate and remember cbdos_flags variable address in KERNAL vars { diff --git a/src/main.c b/src/main.c index df7f3f09..83ab6cb6 100644 --- a/src/main.c +++ b/src/main.c @@ -111,8 +111,8 @@ bool has_via2 = false; gif_recorder_state_t record_gif = RECORD_GIF_DISABLED; char *gif_path = NULL; char *wav_path = NULL; -char *fsroot_path = NULL; -char *startin_path = NULL; +uint8_t *fsroot_path = NULL; +uint8_t *startin_path = NULL; uint8_t keymap = 0; // KERNAL's default int window_scale = 1; float screen_x_scale = 1.0; @@ -957,7 +957,7 @@ main(int argc, char **argv) if (!argc || argv[0][0] == '-') { usage(); } - fsroot_path = argv[0]; + fsroot_path = (uint8_t *)argv[0]; argc--; argv++; } else if (!strcmp(argv[0], "-startin")) { @@ -966,7 +966,7 @@ main(int argc, char **argv) if (!argc || argv[0][0] == '-') { usage(); } - startin_path = argv[0]; + startin_path = (uint8_t *)argv[0]; argc--; argv++; } else if (!strcmp(argv[0], "-noemucmdkeys")) {