forked from hollowaykeanho/Upscaler
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
init: ported To_Unicode_From_UTF32 primitive function
Since a number of level 1 Hestia libraries use string functions, we have to port its primitive ones into HestiaKERNEL library package. Hence, let's do this. This patch ports To_Unicode_From_UTF32 primitive function into HestiaKERNEL library in init/ directory. Co-authored-by: Shuralyov, Jean <[email protected]> Co-authored-by: Galyna, Cory <[email protected]> Co-authored-by: (Holloway) Chew, Kean Ho <[email protected]> Signed-off-by: (Holloway) Chew, Kean Ho <[email protected]>
- Loading branch information
1 parent
d6cd126
commit 235bec1
Showing
7 changed files
with
338 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,6 @@ | |
# | ||
# You MUST ensure any interaction with the content STRICTLY COMPLIES with | ||
# the permissions and limitations set forth in the license. | ||
# Copyright 2024 (Holloway) Chew, Kean Ho <[email protected]> | ||
. "${LIBS_HESTIA}/HestiaKERNEL/Error_Codes.sh" | ||
. "${LIBS_HESTIA}/HestiaKERNEL/Unicode.sh" | ||
|
||
|
@@ -31,6 +30,7 @@ HestiaKERNEL_Is_UTF() { | |
___content="$1" | ||
___count=8 | ||
___utf8_expect=0 | ||
___utf32_expect=0 | ||
___byte_0="" | ||
___byte_1="" | ||
___byte_2="" | ||
|
@@ -112,6 +112,12 @@ HestiaKERNEL_Is_UTF() { | |
fi | ||
|
||
|
||
# detect UTF-32 for later guessing | ||
if [ $___count -le 4 ]; then | ||
___utf32_expect=1 | ||
fi | ||
|
||
|
||
# prepare for next scan | ||
___count=$(($___count - 1)) | ||
done | ||
|
@@ -168,7 +174,7 @@ ${HestiaKERNEL_UTF8} | |
${___output}" | ||
fi | ||
|
||
if [ $((${#1} % 4)) -eq 0 ]; then | ||
if [ $___utf32_expect -gt 0 ]; then | ||
___output="\ | ||
${___output} | ||
${HestiaKERNEL_UTF32BE} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
# Copyright 2024 (Holloway) Chew, Kean Ho <[email protected]> | ||
# | ||
# | ||
# Licensed under (Holloway) Chew, Kean Ho’s Liberal License (the "License"). | ||
# You must comply with the license to use the content. Get the License at: | ||
# | ||
# https://doi.org/10.5281/zenodo.13770769 | ||
# | ||
# You MUST ensure any interaction with the content STRICTLY COMPLIES with | ||
# the permissions and limitations set forth in the license. | ||
. "${env:LIBS_HESTIA}\HestiaKERNEL\Endian.ps1" | ||
. "${env:LIBS_HESTIA}\HestiaKERNEL\Is_Array_Byte.ps1" | ||
. "${env:LIBS_HESTIA}\HestiaKERNEL\Is_UTF.ps1" | ||
. "${env:LIBS_HESTIA}\HestiaKERNEL\Unicode.ps1" | ||
|
||
|
||
|
||
|
||
function HestiaKERNEL-To-Unicode-From-UTF32 { | ||
param ( | ||
[byte[]]$___input_content, | ||
[int]$___input_endian | ||
) | ||
|
||
|
||
# validate input | ||
if ($___input_content.Length -eq 0) { | ||
return [uint32[]]@() | ||
} | ||
|
||
if ($(HestiaKERNEL-Is-Array-Byte $___input_content) -ne ${env:HestiaKERNEL_ERROR_OK}) { | ||
return [uint32[]]@() | ||
} | ||
|
||
|
||
# execute | ||
## IMPORTANT NOTICE | ||
## PowerShell does not handle UTF-32 byte stream in an isolated manner | ||
## without messing up the current terminals' environment variables | ||
## (e.g. $OutputEncoding). To avoid it, manual implementations are | ||
## required. | ||
## | ||
## From the Unicode engineering specification, the default endian is | ||
## big-endian. | ||
|
||
|
||
# check for data encoder | ||
$___endian = ${env:HestiaKERNEL_ENDIAN_BIG} | ||
$___ignore = 0 | ||
$___output = HestiaKERNEL-Is-UTF $___input_content | ||
if ($($___output -replace "${env:HestiaKERNEL_UTF32LE_BOM}", '') -ne $___output) { | ||
# it's UTF32LE with BOM marker | ||
$___endian = ${env:HestiaKERNEL_ENDIAN_LITTLE} | ||
$___ignore = 2 | ||
} elseif ($($___output -replace "${env:HestiaKERNEL_UTF32BE_BOM}", '') -ne $___output) { | ||
# it's UTF32BE with BOM marker | ||
$___endian = ${env:HestiaKERNEL_ENDIAN_BIG} | ||
$___ignore = 2 | ||
} elseif ( | ||
($($___output -replace "${env:HestiaKERNEL_UTF32LE}", '') -ne $___output) -and | ||
($($___output -replace "${env:HestiaKERNEL_UTF32BE}", '') -ne $___output) | ||
) { | ||
# both UTF32LE or UTF32BE can be a candidate | ||
if ( | ||
($___input_endian -eq ${env:HestiaKERNEL_ENDIAN_LITTLE}) -or | ||
($___input_endian -eq ${env:HestiaKERNEL_ENDIAN_BIG}) | ||
) { | ||
$___endian = $___input_endian # If there is a valid hint, take the hint | ||
} else { | ||
# keep the default | ||
} | ||
} else { | ||
# not a UTF byte array | ||
return [uint32[]]@() | ||
} | ||
|
||
|
||
# process to unicode | ||
$___content = [uint32[]]$___input_content | ||
[System.Collections.Generic.List[uint32]]$___converted = @() | ||
$___char = [uint32]0 | ||
$___state = 0 | ||
foreach ($___byte in $___content) { | ||
# ignore BOM markers | ||
if ($___ignore -gt 0) { | ||
$___ignore = $___ignore - 1 | ||
continue | ||
} | ||
|
||
|
||
# process byte data serially | ||
switch ($___state) { | ||
3 { | ||
switch ($___endian) { | ||
${env:HestiaKERNEL_ENDIAN_LITTLE} { | ||
$___byte = $___byte -shl 24 | ||
$___char = $___char -bor $___byte | ||
} default { | ||
$___char = $___char -bor $___byte | ||
}} | ||
$null = $___converted.Add($___char) | ||
|
||
$___state = 0 | ||
} 2 { | ||
switch ($___endian) { | ||
${env:HestiaKERNEL_ENDIAN_LITTLE} { | ||
$___byte = $___byte -shl 16 | ||
$___char = $___char -bor $___byte | ||
} default { | ||
$___byte = $___byte -shl 8 | ||
$___char = $___char -bor $___byte | ||
}} | ||
|
||
$___state = 3 | ||
} 1 { | ||
switch ($___endian) { | ||
${env:HestiaKERNEL_ENDIAN_LITTLE} { | ||
$___byte = $___byte -shl 8 | ||
$___char = $___char -bor $___byte | ||
} default { | ||
$___byte = $___byte -shl 16 | ||
$___char = $___char -bor $___byte | ||
}} | ||
|
||
$___state = 2 | ||
} default { | ||
switch ($___endian) { | ||
${env:HestiaKERNEL_ENDIAN_LITTLE} { | ||
$___char = $___byte | ||
} default { | ||
$___char = $___byte -shl 24 | ||
}} | ||
|
||
$___state = 1 | ||
}} | ||
} | ||
|
||
|
||
# report status | ||
return [uint32[]]$___converted | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
#!/bin/sh | ||
# Copyright 2024 (Holloway) Chew, Kean Ho <[email protected]> | ||
# | ||
# | ||
# Licensed under (Holloway) Chew, Kean Ho’s Liberal License (the "License"). | ||
# You must comply with the license to use the content. Get the License at: | ||
# | ||
# https://doi.org/10.5281/zenodo.13770769 | ||
# | ||
# You MUST ensure any interaction with the content STRICTLY COMPLIES with | ||
# the permissions and limitations set forth in the license. | ||
. "${LIBS_HESTIA}/HestiaKERNEL/Endian.sh" | ||
. "${LIBS_HESTIA}/HestiaKERNEL/Error_Codes.sh" | ||
. "${LIBS_HESTIA}/HestiaKERNEL/Is_Array_Byte.sh" | ||
. "${LIBS_HESTIA}/HestiaKERNEL/Is_UTF.sh" | ||
. "${LIBS_HESTIA}/HestiaKERNEL/Unicode.sh" | ||
|
||
|
||
|
||
|
||
HestiaKERNEL_To_Unicode_From_UTF32() { | ||
#___input_content="$1" | ||
#___input_endian="$2" | ||
|
||
|
||
# validate input | ||
if [ "$1" = "" ]; then | ||
printf -- "" | ||
return $HestiaKERNEL_ERROR_DATA_EMPTY | ||
fi | ||
|
||
if [ $(HestiaKERNEL_Is_Array_Byte "$1") -ne $HestiaKERNEL_ERROR_OK ]; then | ||
printf -- "" | ||
return $HestiaKERNEL_ERROR_DATA_INVALID | ||
fi | ||
|
||
|
||
# execute | ||
## IMPORTANT NOTICE | ||
## POSIX Shell does not handle UTF-32 byte stream in an isolated manner | ||
## without messing up the current terminal's $LANG settings. To avoid | ||
## it, manual implementations are required. | ||
## | ||
## From the Unicode engineering specification, the default endian is | ||
## big-endian. | ||
|
||
|
||
# check for data encoder | ||
___endian=$HestiaKERNEL_ENDIAN_BIG | ||
___ignore=0 | ||
___output="$(HestiaKERNEL_Is_UTF "$1")" | ||
if [ ! "${___output#*"$HestiaKERNEL_UTF32LE_BOM"}" = "$___output" ]; then | ||
# it's UTF32LE with BOM marker | ||
___endian=$HestiaKERNEL_ENDIAN_LITTLE | ||
___ignore=4 | ||
elif [ ! "${___output#*"$HestiaKERNEL_UTF32BE_BOM"}" = "$___output" ]; then | ||
# it's UTF32BE with BOM marker | ||
___endian=$HestiaKERNEL_ENDIAN_BIG | ||
___ignore=4 | ||
elif [ ! "${___output#*"$HestiaKERNEL_UTF32LE"}" = "$___output" ] && | ||
[ ! "${___output#*"$HestiaKERNEL_UTF32BE"}" = "$___output" ]; then | ||
# both UTF32LE or UTF32BE can be a candidate | ||
if [ "$2" = "$HestiaKERNEL_ENDIAN_LITTLE" ] || | ||
[ "$2" = "$HestiaKERNEL_ENDIAN_BIG" ]; then | ||
___endian="$2" # If there is a valid hint, take the hint | ||
else | ||
: # keep the default | ||
fi | ||
else | ||
# not a UTF byte array | ||
printf -- "" | ||
return $HestiaKERNEL_ERROR_DATA_INVALID | ||
fi | ||
|
||
|
||
# process to unicode | ||
___content="$1" | ||
___converted="" | ||
___char=0 | ||
___state=0 | ||
while [ ! "$___content" = "" ]; do | ||
# get current byte | ||
___byte="${___content%%, *}" | ||
___content="${___content#"$___byte"}" | ||
if [ "${___content%"${___content#?}"}" = "," ]; then | ||
___content="${___content#, }" | ||
fi | ||
|
||
|
||
# ignore BOM markers | ||
if [ $___ignore -gt 0 ]; then | ||
___ignore=$(($___ignore - 1)) | ||
continue | ||
fi | ||
|
||
|
||
# process byte data serially | ||
case "$___state" in | ||
3) | ||
case "$___endian" in | ||
$HestiaKERNEL_ENDIAN_LITTLE) | ||
___byte=$(($___byte << 24)) | ||
___char=$(($___char | $___byte)) | ||
;; | ||
*) | ||
___char=$(($___char | $___byte)) | ||
;; | ||
esac | ||
___converted="${___converted}$(printf -- "%d" "$___char"), " | ||
|
||
___state=0 | ||
;; | ||
2) | ||
case "$___endian" in | ||
$HestiaKERNEL_ENDIAN_LITTLE) | ||
___byte=$(($___byte << 16)) | ||
___char=$(($___char | $___byte)) | ||
;; | ||
*) | ||
___byte=$(($___byte << 8)) | ||
___char=$(($___char | $___byte)) | ||
;; | ||
esac | ||
|
||
___state=3 | ||
;; | ||
1) | ||
case "$___endian" in | ||
$HestiaKERNEL_ENDIAN_LITTLE) | ||
___byte=$(($___byte << 8)) | ||
___char=$(($___char | $___byte)) | ||
;; | ||
*) | ||
___byte=$(($___byte << 16)) | ||
___char=$(($___char | $___byte)) | ||
;; | ||
esac | ||
|
||
___state=2 | ||
;; | ||
*) | ||
case "$___endian" in | ||
$HestiaKERNEL_ENDIAN_LITTLE) | ||
___char=$___byte | ||
;; | ||
*) | ||
___char=$(($___byte << 24)) | ||
;; | ||
esac | ||
|
||
___state=1 | ||
;; | ||
esac | ||
done | ||
|
||
|
||
# report status | ||
printf -- "%s" "${___converted%, }" | ||
return $HestiaKERNEL_ERROR_OK | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.