Skip to content

Commit

Permalink
Support UTF character to String class
Browse files Browse the repository at this point in the history
  • Loading branch information
ab25cq committed Nov 23, 2016
1 parent d9f2a5e commit ec8363e
Show file tree
Hide file tree
Showing 13 changed files with 232 additions and 19 deletions.
3 changes: 3 additions & 0 deletions Fundamental.clc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ class Global
str2: String = System.sprintf(str, params);
System.print(str2);
}
def printf(str:String): static {
System.print(str);
}
def printfToError(str:String, params:Array<Object>): static {
str2: String = System.sprintf(str, params);
System.printToError(str2);
Expand Down
2 changes: 2 additions & 0 deletions code/regex.cl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ a:regex = /^A/;
pcre_ovec: PcreOVec = new PcreOVec(3);

Clover.test("regex test", System.pcre_exec(a, "ABC", 0, 3, pcre_ovec) >= 0);

printf("ovec.start[0] %d ovec.end[0] %d\n", array { pcre_ovec.start[0].toInteger, pcre_ovec.end[0].toInteger } );
Clover.test("regex test", pcre_ovec.start[0] == 0 && pcre_ovec.end[0] == 1);

pcre_ovec2: PcreOVec = new PcreOVec(3);
Expand Down
2 changes: 1 addition & 1 deletion code/string2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,5 @@ Clover.test("string2 test29", "abc".compareWithIgnoreCase("ABC") == 0);
Clover.test("string2 test30", "0xFF".hex() == 255);
Clover.test("string2 test31", "010".oct() == 8);

Clover.test("string2 test32", "あいう".match(/い/));
Clover.test("string2 test32", "あいう".scan(/./).equals(list {"あ", "い", "う"}));

2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -3946,7 +3946,7 @@ CFLAGS="$CFLAGS -Qunused-arguments"
OBJS="src/main.o src/vm.o src/object.o src/array.o src/string.o src/block.o src/regex.o src/integer.o src/byte.o src/short.o src/long.o src/float.o src/pointer.o src/char.o src/bool.o src/hash.o src/list.o src/tuple.o src/carray.o src/type.o src/stack.o src/heap.o src/exception.o src/native_method.o src/class_system.o src/class_clover.o src/clover_to_clang.o"
COMPILER_OBJS="src/compiler.o src/parser.o src/node_type.o src/node.o src/cast.o src/vtable.o src/script.o src/node_block.o src/node_block_type.o src/class_compiler.o src/klass_compile_time.o src/method_compiler.o src/module.o"
LIB_OBJS="src/buffer.o src/debug.o src/xfunc.o src/klass.o src/constant.o src/code.o src/alignment.o"
LIB_OBJS="src/buffer.o src/debug.o src/xfunc.o src/klass.o src/constant.o src/code.o src/alignment.o src/utf.o"
Expand Down
2 changes: 1 addition & 1 deletion configure.in
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ AC_SUBST(CFLAGS)

OBJS="src/main.o src/vm.o src/object.o src/array.o src/string.o src/block.o src/regex.o src/integer.o src/byte.o src/short.o src/long.o src/float.o src/pointer.o src/char.o src/bool.o src/hash.o src/list.o src/tuple.o src/carray.o src/type.o src/stack.o src/heap.o src/exception.o src/native_method.o src/class_system.o src/class_clover.o src/clover_to_clang.o"
COMPILER_OBJS="src/compiler.o src/parser.o src/node_type.o src/node.o src/cast.o src/vtable.o src/script.o src/node_block.o src/node_block_type.o src/class_compiler.o src/klass_compile_time.o src/method_compiler.o src/module.o"
LIB_OBJS="src/buffer.o src/debug.o src/xfunc.o src/klass.o src/constant.o src/code.o src/alignment.o"
LIB_OBJS="src/buffer.o src/debug.o src/xfunc.o src/klass.o src/constant.o src/code.o src/alignment.o src/utf.o"

AC_SUBST(OBJS)
AC_SUBST(COMPILER_OBJS)
Expand Down
119 changes: 119 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#! /bin/sh

#
# install - install a program, script, or datafile
# This comes from X11R5; it is not part of GNU.
#
# $XConsortium: install.sh,v 1.2 89/12/18 14:47:22 jim Exp $
#
# This script is compatible with the BSD install script, but was written
# from scratch.
#


# set DOITPROG to echo to test this script

# Don't use :- since 4.3BSD and earlier shells don't like it.
doit="${DOITPROG-}"


# put in absolute paths if you don't have them in your path; or use env. vars.

mvprog="${MVPROG-mv}"
cpprog="${CPPROG-cp}"
chmodprog="${CHMODPROG-chmod}"
chownprog="${CHOWNPROG-chown}"
chgrpprog="${CHGRPPROG-chgrp}"
stripprog="${STRIPPROG-strip}"
rmprog="${RMPROG-rm}"

instcmd="$mvprog"
chmodcmd=""
chowncmd=""
chgrpcmd=""
stripcmd=""
rmcmd="$rmprog -f"
mvcmd="$mvprog"
src=""
dst=""

while [ x"$1" != x ]; do
case $1 in
-c) instcmd="$cpprog"
shift
continue;;

-m) chmodcmd="$chmodprog $2"
shift
shift
continue;;

-o) chowncmd="$chownprog $2"
shift
shift
continue;;

-g) chgrpcmd="$chgrpprog $2"
shift
shift
continue;;

-s) stripcmd="$stripprog"
shift
continue;;

*) if [ x"$src" = x ]
then
src=$1
else
dst=$1
fi
shift
continue;;
esac
done

if [ x"$src" = x ]
then
echo "install: no input file specified"
exit 1
fi

if [ x"$dst" = x ]
then
echo "install: no destination specified"
exit 1
fi


# If destination is a directory, append the input filename; if your system
# does not like double slashes in filenames, you may need to add some logic

if [ -d $dst ]
then
dst="$dst"/`basename $src`
fi

# Make a temp file name in the proper directory.

dstdir=`dirname $dst`
dsttmp=$dstdir/#inst.$$#

# Move or copy the file name to the temp name

$doit $instcmd $src $dsttmp

# and set any options; do chmod last to preserve setuid bits

if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; fi
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; fi
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; fi
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; fi

# Now rename the file to the real destination.

$doit $rmcmd $dst
$doit $mvcmd $dsttmp $dst


exit 0
16 changes: 7 additions & 9 deletions src/class_system.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,18 +306,14 @@ BOOL System_pcre_exec(CLVALUE** stack_ptr, CLVALUE* lvar, sVMInfo* info)

pcre* regex_value = regex_object_data->mRegex;

wchar_t* wstr = ALLOC string_object_to_wchar_array(str->mObjectValue);
char* str_value = string_object_to_char_array(str->mObjectValue);

int len = wcslen(wstr);

char* str_value = ALLOC xwcstombs(wstr, len);

MFREE(wstr);
int len = strlen(str_value);

int ovec_max_value = ovec_max->mIntValue;
int* ovec_value = MCALLOC(1, sizeof(int)*ovec_max_value * 3);

int offset_value = offset->mIntValue;
int offset_value = utf32_index_to_utf8_index(str_value, offset->mIntValue);

/// go ///
int options = PCRE_NEWLINE_LF;
Expand All @@ -336,10 +332,12 @@ BOOL System_pcre_exec(CLVALUE** stack_ptr, CLVALUE* lvar, sVMInfo* info)
int i;
for(i=0; i<ovec_max_value; i++) {
if(i < pcre_ovec_start_array_data->mArrayNum) {
pcre_ovec_start_array_data->mFields[i].mIntValue = ovec_value[i*2];
int utf32index = utf8_index_to_utf32_index(str_value, ovec_value[i*2]);
pcre_ovec_start_array_data->mFields[i].mIntValue = utf32index;
}
if(i < pcre_ovec_end_array_data->mArrayNum) {
pcre_ovec_end_array_data->mFields[i].mIntValue = ovec_value[i*2+1];
int utf32index = utf8_index_to_utf32_index(str_value, ovec_value[i*2+1]);
pcre_ovec_end_array_data->mFields[i].mIntValue = utf32index;
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1786,5 +1786,9 @@ BOOL initialize_carray_object(CLObject array_object, int num_elements, CLObject*
BOOL read_source(char* fname, sBuf* source);
BOOL delete_comment(sBuf* source, sBuf* source2);

/// utf.c ///
int utf8_index_to_utf32_index(char* str, int utf8index);
int utf32_index_to_utf8_index(char* str, int utf32index);

#endif

1 change: 0 additions & 1 deletion src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -1932,7 +1932,6 @@ static BOOL expression_node(unsigned int* node, sParserInfo* info)

*node = sNodeTree_create_character_value(c);
}

}
else if(*info->p == '{') {
info->p++;
Expand Down
2 changes: 1 addition & 1 deletion src/regex.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ CLObject create_regex_object(char* regex, BOOL global, BOOL ignore_case, BOOL mu
int erro_ofs;

int options = PCRE_UTF8 | (ignore_case ? PCRE_CASELESS:0) | (multiline ? PCRE_MULTILINE : 0) | (extended ? PCRE_EXTENDED :0) | (dotall ? PCRE_DOTALL :0) | (dollar_endonly ? PCRE_DOLLAR_ENDONLY:0) | (ungreedy ? PCRE_UNGREEDY:0);
//int options = PCRE_UTF8 | (ignore_case ? PCRE_CASELESS:0) | (multiline ? PCRE_MULTILINE : 0) | (extended ? PCRE_EXTENDED :0) | (dotall ? PCRE_DOTALL :0) | (anchored ? PCRE_ANCHORED : 0) | (dollar_endonly ? PCRE_DOLLAR_ENDONLY) | (ungreedy ? PCRE_UNGREEDY);
//int options = PCRE_UTF32 | (ignore_case ? PCRE_CASELESS:0) | (multiline ? PCRE_MULTILINE : 0) | (extended ? PCRE_EXTENDED :0) | (dotall ? PCRE_DOTALL :0) | (anchored ? PCRE_ANCHORED : 0) | (dollar_endonly ? PCRE_DOLLAR_ENDONLY) | (ungreedy ? PCRE_UNGREEDY);

object_data->mRegex = pcre_compile(regex, options,&err, &erro_ofs, NULL);
object_data->mGlobal = global;
Expand Down
11 changes: 7 additions & 4 deletions src/string.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "common.h"
#include <wchar.h>

CLObject create_string_object(char* str)
{
Expand All @@ -9,6 +10,8 @@ CLObject create_string_object(char* str)

(void)mbstowcs(wstr, str, len+1);

int wlen = wcslen(wstr);

/// create object ///
sCLClass* string_class = get_class("String");
MASSERT(string_class != NULL);
Expand All @@ -23,20 +26,20 @@ CLObject create_string_object(char* str)
sCLClass* char_class = get_class("char");
MASSERT(char_class != NULL);

CLObject buffer = create_array_object(char_class, len+1);
CLObject buffer = create_array_object(char_class, wlen+1);
sCLObject* buffer_data = CLOBJECT(buffer);

int i;
for(i=0; i<len; i++) {
for(i=0; i<wlen; i++) {
buffer_data->mFields[i].mCharValue = wstr[i];
}
buffer_data->mFields[i].mCharValue = '\0';

/// entry char array to object ///
sCLObject* obj_data = CLOBJECT(obj);
obj_data->mFields[0].mObjectValue = buffer;
obj_data->mFields[1].mIntValue = len + 1;
obj_data->mFields[2].mIntValue = len;
obj_data->mFields[1].mIntValue = wlen + 1;
obj_data->mFields[2].mIntValue = wlen;

/// pop object ///
gGlobalStackPtr--;
Expand Down
83 changes: 83 additions & 0 deletions src/utf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include "common.h"

int utf8_index_to_utf32_index(char* str, int utf8_index)
{
unsigned char* str2 = (unsigned char*)str;
unsigned char* p = str2;

int utf32_index = 0;

if(utf8_index == 0) {
return 0;
}

while(*p) {
/// utf8 character ///
if(*p > 127) {
int size;

size = ((*p & 0x80) >> 7) + ((*p & 0x40) >> 6) + ((*p & 0x20) >> 5) + ((*p & 0x10) >> 4);

if(size > MB_LEN_MAX) {
return -1;
}
else {
p+=size;
}

utf32_index++;
}
/// ascii ///
else {
p++;
utf32_index++;
}

if(p - str2 == utf8_index) {
break;
}
}

return utf32_index;
}

int utf32_index_to_utf8_index(char* str, int utf32_index)
{
unsigned char* str2 = (unsigned char*)str;
unsigned char* p = str2;

int utf32_index2 = 0;

if(utf32_index == 0) {
return 0;
}

while(*p) {
/// utf8 character ///
if(*p > 127) {
int size;

size = ((*p & 0x80) >> 7) + ((*p & 0x40) >> 6) + ((*p & 0x20) >> 5) + ((*p & 0x10) >> 4);

if(size > MB_LEN_MAX) {
return -1;
}
else {
p+=size;
}

utf32_index2++;
}
/// ascii ///
else {
p++;
utf32_index2++;
}

if(utf32_index2 == utf32_index) {
break;
}
}

return p - str2;
}
4 changes: 3 additions & 1 deletion tags
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ LD_LIBRARY_PATH Makefile /^ export LD_LIBRARY_PATH=.$(LD_LIBRARY_PATH); export L
LIBS Makefile /^LIBS= -lutil -ldl -lm -lpcre$/;" m
LIBSO2NAME Makefile /^LIBSO2NAME=libclover2.so.1.0.0$/;" m
LIBSONAME Makefile /^LIBSONAME=libclover2.so$/;" m
LIB_OBJS Makefile /^LIB_OBJS=src\/buffer.o src\/debug.o src\/xfunc.o src\/klass.o src\/constant.o src\/code.o src\/alignment.o$/;" m
LIB_OBJS Makefile /^LIB_OBJS=src\/buffer.o src\/debug.o src\/xfunc.o src\/klass.o src\/constant.o src\/code.o src\/alignment.o src\/utf.o$/;" m
LIST_VALUE_ELEMENT_MAX src/common.h 44;" d
LOCAL_VARIABLE_MAX src/common.h 339;" d
MACROS_H src/macros.h 2;" d
Expand Down Expand Up @@ -1966,6 +1966,8 @@ uValue src/common.h /^ } uValue;$/;" m struct:sNodeTreeStruct typeref:union:s
unboxig_posibility src/node_type.c /^BOOL unboxig_posibility(sCLClass* klass)$/;" f
unboxing_to_primitive_type src/node_type.c /^BOOL unboxing_to_primitive_type(sNodeType** left_type, struct sCompileInfoStruct* info)$/;" f
unload_module src/module.c /^void unload_module(char* module_name)$/;" f
utf32_index_to_utf8_index src/utf.c /^int utf32_index_to_utf8_index(char* str, int utf32_index)$/;" f
utf8_index_to_utf32_index src/utf.c /^int utf8_index_to_utf32_index(char* str, int utf8_index)$/;" f
vm src/vm.c /^BOOL vm(sByteCode* code, sConst* constant, CLVALUE* stack, int var_num, sCLClass* klass, sVMInfo* info)$/;" f
vm_mutex_off src/vm.c /^void vm_mutex_off()$/;" f
vm_mutex_on src/vm.c /^void vm_mutex_on()$/;" f
Expand Down

0 comments on commit ec8363e

Please sign in to comment.