Skip to content

Commit

Permalink
Merge pull request #45 from fleeksoft/develop
Browse files Browse the repository at this point in the history
enable js resource tests and update with jsoup latest code
  • Loading branch information
itboy87 authored Aug 9, 2024
2 parents 0086bd2 + e21c475 commit 579adab
Show file tree
Hide file tree
Showing 67 changed files with 1,991 additions and 1,358 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
branches: [ "release" ]
paths-ignore: [ "**.md" ]
pull_request:
branches: [ "release" ]
branches: [ "release", "develop" ]
paths-ignore: [ "**.md" ]

jobs:
Expand Down Expand Up @@ -64,6 +64,7 @@ jobs:
# ORG_GRADLE_PROJECT_signingInMemoryKeyPassword: ${{ secrets.GPG_KEY_PASSWORD }}
#
deploy_docs:
if: github.ref == 'refs/heads/release'
runs-on: macos-latest
needs:
- build
Expand Down
11 changes: 10 additions & 1 deletion ksoup-test/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ val generateBuildConfigFile: Task by tasks.creating {
}
}

tasks.all {
tasks.configureEach {
if (name != generateBuildConfigFile.name && !name.contains("publish", ignoreCase = true)) {
dependsOn(generateBuildConfigFile.name)
}
Expand All @@ -34,4 +34,13 @@ kotlin {
this.kotlin.srcDir(layout.buildDirectory.file(rootPath))
}
}
js(IR) {
browser {
testTask {
useMocha {
timeout = "9s"
}
}
}
}
}
1 change: 1 addition & 0 deletions ksoup-test/module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ test-dependencies:
- $libs.codepoints
- $libs.kotlinx.coroutines.test
- $libs.kotlinx.datetime
- $libs.stately.concurrent

settings:
kotlin:
Expand Down
4 changes: 0 additions & 4 deletions ksoup-test/test/com/fleeksoft/ksoup/GzipTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ import kotlin.test.assertEquals
class GzipTest {
@Test
fun testReadGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val gzipFileStr = TestHelper.readGzipResource("htmltests/gzip.html.gz").readAll()
.toString(charset = Charsets.UTF8)
val expected = """<title>Gzip test</title>
Expand Down
21 changes: 11 additions & 10 deletions ksoup-test/test/com/fleeksoft/ksoup/TestHelper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@ import korlibs.io.stream.readAll
object TestHelper {

suspend fun readGzipResource(file: String): SyncStream {
return readGzipFile(getResourceAbsolutePath(file))
return readGzipFile(getResourceAbsolutePath(file).uniVfs)
}

fun getResourceAbsolutePath(resourceName: String): String {
if (Platform.current == PlatformType.WINDOWS) {
if (Platform.isWindows()) {
return "../../../../testResources/$resourceName"
} else if (Platform.isJS()) {
return "https://raw.githubusercontent.com/fleeksoft/ksoup/release/ksoup-test/testResources/$resourceName"
}
return "${BuildConfig.PROJECT_ROOT}/ksoup-test/testResources/$resourceName"
}

suspend fun getFileAsString(file: VfsFile): String {
val bytes: ByteArray =
if (file.fullName.endsWith(".gz")) {
readGzipFile(file.absolutePath).readAll()
} else {
readFile(file.absolutePath).readAll()
}
val bytes: ByteArray = if (file.fullName.endsWith(".gz")) {
readGzipFile(file).readAll()
} else {
readFile(file).readAll()
}
return bytes.decodeToString()
}

Expand All @@ -36,9 +37,9 @@ object TestHelper {

suspend fun pathToStream(file: VfsFile): SyncStream {
return if (file.fullName.endsWith(".gz")) {
readGzipFile(file.absolutePath)
readGzipFile(file)
} else {
readFile(file.absolutePath)
readFile(file)
}
}
}
90 changes: 54 additions & 36 deletions ksoup-test/test/com/fleeksoft/ksoup/helper/DataUtilTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,6 @@ class DataUtilTest {

@Test
fun supportsBOMinFiles() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
var input = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16be.html")
var doc: Document =
Ksoup.parseFile(filePath = input, baseUri = "http://example.com", charsetName = null)
Expand All @@ -187,7 +183,7 @@ class DataUtilTest {
assertTrue(doc.title().contains("UTF-16LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

if (Platform.isJS()) {
if (Platform.isJS() || Platform.isWindows()) {
// FIXME: UTF-32 charset not supported
return@runTest
}
Expand All @@ -203,32 +199,58 @@ class DataUtilTest {
}

@Test
fun supportsUTF8BOM() = runTest {
if (Platform.isJS()) {
// js resource access issue
fun streamerSupportsBOMinFiles() = runTest {
// test files from http://www.i18nl10n.com/korean/utftest/
var file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16be.html").uniVfs
val parser = Parser.htmlParser()

var doc: Document =
DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-16BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16le.html").uniVfs
doc = DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-16LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

if (Platform.isJS() || Platform.isWindows()) {
// FIXME: UTF-32 charset not supported
return@runTest
}

file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf32be.html").uniVfs
doc = DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-32BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf32le.html").uniVfs
doc = DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-32LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
}

@Test
fun supportsUTF8BOM() = runTest {
val input: String = TestHelper.getResourceAbsolutePath("bomtests/bom_utf8.html")
val doc: Document = Ksoup.parseFile(input, "http://example.com", null)
assertEquals("OK", doc.head().select("title").text())
}

@Test
fun noExtraNULLBytes() {
val b =
"<html><head><meta charset=\"UTF-8\"></head><body><div><u>ü</u>ü</div></body></html>".toByteArray(
Charsets.UTF8,
)
val b = "<html><head><meta charset=\"UTF-8\"></head><body><div><u>ü</u>ü</div></body></html>"
.toByteArray(Charsets.UTF8)
val doc = Ksoup.parse(b.openSync(), baseUri = "", charsetName = null)
assertFalse(doc.outerHtml().contains("\u0000"))
}

@Test
fun supportsZippedUTF8BOM() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input: String = TestHelper.getResourceAbsolutePath("bomtests/bom_utf8.html.gz")
val doc: Document =
Ksoup.parseFile(
Expand All @@ -243,6 +265,22 @@ class DataUtilTest {
)
}

@Test
fun streamerSupportsZippedUTF8BOM() = runTest {
val file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf8.html.gz").uniVfs
val doc = DataUtil.streamParser(
file = file,
baseUri = "http://example.com",
charset = null,
parser = Parser.htmlParser()
).complete();
assertEquals("OK", doc.head().select("title").text());
assertEquals(
"There is a UTF8 BOM at the top (before the XML decl). If not read correctly, will look like a non-joining space.",
doc.body().text()
);
}

@Test
fun supportsXmlCharsetDeclaration() {
val encoding = "iso-8859-1"
Expand All @@ -259,10 +297,6 @@ class DataUtilTest {

@Test
fun loadsGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input: String = TestHelper.getResourceAbsolutePath("htmltests/gzip.html.gz")
val doc: Document = Ksoup.parseFile(filePath = input, charsetName = null)
doc.toString()
Expand All @@ -272,10 +306,6 @@ class DataUtilTest {

@Test
fun loadsZGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
// compressed on win, with z suffix
val input: String = TestHelper.getResourceAbsolutePath("htmltests/gzip.html.z")
val doc: Document = Ksoup.parseFile(filePath = input, charsetName = null)
Expand All @@ -285,10 +315,6 @@ class DataUtilTest {

@Test
fun handlesFakeGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input: String = TestHelper.getResourceAbsolutePath("htmltests/fake-gzip.html.gz")
val doc: Document = Ksoup.parseFile(filePath = input, charsetName = null)
assertEquals("This is not gzipped", doc.title())
Expand All @@ -297,10 +323,6 @@ class DataUtilTest {

@Test
fun handlesChunkedInputStream() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val inputFile: String = TestHelper.getResourceAbsolutePath("htmltests/large.html.gz")
val input: String = TestHelper.getFileAsString(inputFile.uniVfs)
// val stream = VaryingBufferReader(BufferReader(input))
Expand All @@ -313,10 +335,6 @@ class DataUtilTest {

@Test
fun handlesUnlimitedRead() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val inputFile: String = TestHelper.getResourceAbsolutePath("htmltests/large.html.gz")
val input: String = TestHelper.getFileAsString(inputFile.uniVfs)
val byteBuffer: ByteArray = DataUtil.readToByteBuffer(input.openSync(), 0)
Expand Down
58 changes: 13 additions & 45 deletions ksoup-test/test/com/fleeksoft/ksoup/integration/ParseTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,17 @@ import kotlin.test.assertTrue
class ParseTest {
@Test
fun testHtml5Charset() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
if (Platform.isApple()) {
// apple don't support gb2312 or gbk
if (Platform.isApple() || Platform.isWindows()) {
// don't support gb2312 or gbk
return@runTest
}
// test that <meta charset="gb2312"> works
var input = TestHelper.getResourceAbsolutePath("htmltests/meta-charset-1.html")
var doc: Document =
parseFile(
filePath = input,
baseUri = "http://example.com/",
charsetName = null,
) // gb2312, has html5 <meta charset>
var doc: Document = parseFile(
filePath = input,
baseUri = "http://example.com/",
charsetName = null,
) // gb2312, has html5 <meta charset>
if (Platform.isJS()) {
// FIXME: on js it is returning GBK
assertEquals("GBK", doc.outputSettings().charset().name.uppercase())
Expand Down Expand Up @@ -85,10 +80,6 @@ class ParseTest {

@Test
fun testLowercaseUtf8Charset() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input = TestHelper.getResourceAbsolutePath("htmltests/lowercase-charset-test.html")
val doc: Document = parseFile(filePath = input, charsetName = null)
val form = doc.select("#form").first()
Expand All @@ -98,11 +89,6 @@ class ParseTest {

@Test
fun testXwiki() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
// https://github.com/jhy/jsoup/issues/1324
// this tests that when in CharacterReader we hit a buffer while marked, we preserve the mark when buffered up and can rewind
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-1324.html.gz")
val doc: Document =
Expand All @@ -122,21 +108,15 @@ class ParseTest {

@Test
fun testXwikiExpanded() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
// https://github.com/jhy/jsoup/issues/1324
// this tests that if there is a huge illegal character reference, we can get through a buffer and rewind, and still catch that it's an invalid refence,
// and the parse tree is correct.
val parser = Parser.htmlParser()
val doc =
parse(
syncStream = TestHelper.resourceFilePathToStream("htmltests/xwiki-edit.html.gz"),
baseUri = "https://localhost/",
charsetName = "UTF-8",
parser = parser.setTrackErrors(100),
)
val doc = parse(
syncStream = TestHelper.resourceFilePathToStream("htmltests/xwiki-edit.html.gz"),
baseUri = "https://localhost/",
charsetName = "UTF-8",
parser = parser.setTrackErrors(100),
)
val errors = parser.getErrors()
assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text())
assertEquals(0, errors.size) // not an invalid reference because did not look legit
Expand All @@ -150,10 +130,6 @@ class ParseTest {

@Test
fun testWikiExpandedFromString() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-edit.html.gz")
val html = TestHelper.getFileAsString(input.uniVfs)
val doc = parse(html)
Expand All @@ -165,10 +141,6 @@ class ParseTest {

@Test
fun testWikiFromString() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-1324.html.gz")
val html = TestHelper.getFileAsString(input.uniVfs)
val doc = parse(html)
Expand All @@ -180,10 +152,6 @@ class ParseTest {

@Test
fun testFileParseNoCharsetMethod() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val file = TestHelper.getResourceAbsolutePath("htmltests/xwiki-1324.html.gz")
val doc: Document = parseFile(file)
assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text())
Expand Down
Loading

0 comments on commit 579adab

Please sign in to comment.