Skip to content

Commit

Permalink
Merge pull request #42 from fleeksoft/develop
Browse files Browse the repository at this point in the history
fix test
  • Loading branch information
itboy87 authored Aug 6, 2024
2 parents 0893573 + aa06a83 commit 0086bd2
Show file tree
Hide file tree
Showing 22 changed files with 327 additions and 386 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
**Ksoup** is a Kotlin Multiplatform library for working with real-world HTML and XML. It's a port of the renowned Java library, **jsoup**, and offers an easy-to-use API for URL fetching, data parsing, extraction, and manipulation using DOM and CSS selectors.

[![Kotlin](https://img.shields.io/badge/Kotlin-2.0.0-blue.svg?style=flat&logo=kotlin)](https://kotlinlang.org)
[![Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-green.svg)](https://opensource.org/licenses/Apache-2.0)
[![Maven Central](https://img.shields.io/maven-central/v/com.fleeksoft.ksoup/ksoup.svg)](https://central.sonatype.com/artifact/com.fleeksoft.ksoup/ksoup)

![badge-android](http://img.shields.io/badge/platform-android-6EDB8D.svg?style=flat)
Expand Down
5 changes: 4 additions & 1 deletion gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ dokka = "1.9.20"
korlibs = "6.0.0"
mavenPublish = "0.29.0"

stately-concurrent = "2.0.7"

[libraries]
kotlin-test = { module = "org.jetbrains.kotlin:kotlin-test", version.ref = "kotlin" }
kotlinx-coroutines = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" }
kotlinx-coroutines-core = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" }
kotlinx-coroutines-test = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-test", version.ref = "coroutines" }
ktor-client-core = { module = "io.ktor:ktor-client-core", version.ref = "ktor" }
ktor-client-logging = { module = "io.ktor:ktor-client-logging", version.ref = "ktor" }
Expand All @@ -34,6 +36,7 @@ kotlinx-datetime = { module = "org.jetbrains.kotlinx:kotlinx-datetime", version.
kotlinx-io = { module = "org.jetbrains.kotlinx:kotlinx-io-core", version.ref = "kotlinx-io" }
codepoints = { module = "de.cketti.unicode:kotlin-codepoints-deluxe", version.ref = "codepoints" }
korlibs-io = { module = "com.soywiz:korlibs-io", version.ref = "korlibs" }
stately-concurrent = { module = "co.touchlab:stately-concurrent-collections", version.ref = "stately-concurrent" }

[plugins]
androidLibrary = { id = "com.android.library", version.ref = "agp" }
Expand Down
4 changes: 2 additions & 2 deletions ksoup-test/test/com/fleeksoft/ksoup/TestHelper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ object TestHelper {
}

fun getResourceAbsolutePath(resourceName: String): String {
if (Platform.current == PlatformType.WASM_JS) {
return resourceName
if (Platform.current == PlatformType.WINDOWS) {
return "../../../../testResources/$resourceName"
}
return "${BuildConfig.PROJECT_ROOT}/ksoup-test/testResources/$resourceName"
}
Expand Down
57 changes: 27 additions & 30 deletions ksoup-test/test/com/fleeksoft/ksoup/helper/DataUtilTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@ package com.fleeksoft.ksoup.helper
import com.fleeksoft.ksoup.*
import com.fleeksoft.ksoup.nodes.Document
import com.fleeksoft.ksoup.parser.Parser
import korlibs.io.file.std.resourcesVfs
import korlibs.io.file.std.uniVfs
import korlibs.io.lang.Charset
import korlibs.io.lang.Charsets
import korlibs.io.lang.toByteArray
import korlibs.io.resources.resourceLocal
import korlibs.io.stream.SyncStream
import korlibs.io.stream.openSync
import kotlinx.coroutines.flow.toList
import kotlinx.coroutines.test.runTest
import kotlin.test.*

Expand Down Expand Up @@ -71,10 +68,10 @@ class DataUtilTest {
val html = "\uFEFF<html><head><title>One</title></head><body>Two</body></html>"
val doc: Document =
DataUtil.parseInputSource(
html.openSync(),
null,
"http://foo.com/",
Parser.htmlParser(),
syncStream = html.openSync(),
baseUri = "http://foo.com/",
charsetName = null,
parser = Parser.htmlParser(),
)
assertEquals("One", doc.head().text())
assertEquals("UTF-8", doc.outputSettings().charset().name.uppercase())
Expand Down Expand Up @@ -121,10 +118,10 @@ class DataUtilTest {
val html = "<html><head><meta charset=iso-8></head><body></body></html>"
val doc: Document =
DataUtil.parseInputSource(
this.bufferByteArrayCharset(html),
null,
"http://example.com",
Parser.htmlParser(),
syncStream = this.bufferByteArrayCharset(html),
baseUri = "http://example.com",
charsetName = null,
parser = Parser.htmlParser(),
)
val expected = """<html>
<head>
Expand All @@ -143,15 +140,15 @@ class DataUtilTest {
}
val html =
"<html><head>" +
"<meta http-equiv=\"Content-Type\" content=\"text/html\">" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=euc-kr\">" +
"</head><body>한국어</body></html>"
"<meta http-equiv=\"Content-Type\" content=\"text/html\">" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=euc-kr\">" +
"</head><body>한국어</body></html>"
val doc: Document =
DataUtil.parseInputSource(
bufferByteArrayCharset(html, "euc-kr"),
null,
"http://example.com",
Parser.htmlParser(),
syncStream = bufferByteArrayCharset(data = html, charset = "euc-kr"),
baseUri = "http://example.com",
charsetName = null,
parser = Parser.htmlParser(),
)
assertEquals("한국어", doc.body().text())
}
Expand All @@ -160,15 +157,15 @@ class DataUtilTest {
fun firstMetaElementWithCharsetShouldBeUsedForDecoding() {
val html =
"<html><head>" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=koi8-u\">" +
"</head><body>Übergrößenträger</body></html>"
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=koi8-u\">" +
"</head><body>Übergrößenträger</body></html>"
val docByteArrayCharset: Document =
DataUtil.parseInputSource(
bufferByteArrayCharset(html, "iso-8859-1"),
null,
"http://example.com",
Parser.htmlParser(),
syncStream = bufferByteArrayCharset(data = html, charset = "iso-8859-1"),
baseUri = "http://example.com",
charsetName = null,
parser = Parser.htmlParser(),
)

assertEquals("Übergrößenträger", docByteArrayCharset.body().text())
Expand Down Expand Up @@ -251,17 +248,17 @@ class DataUtilTest {
val encoding = "iso-8859-1"
val soup =
(
"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>" +
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\">Hellö Wörld!</html>"
)
"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>" +
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\">Hellö Wörld!</html>"
)
.toByteArray(Charset.forName(encoding)).openSync()
val doc: Document = Ksoup.parse(soup, baseUri = "", charsetName = null)
assertEquals("Hellö Wörld!", doc.body().text())
}

@Test
fun lLoadsGzipFile() = runTest {
fun loadsGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
Expand Down
34 changes: 18 additions & 16 deletions ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package com.fleeksoft.ksoup.nodes

import com.fleeksoft.ksoup.*
import com.fleeksoft.ksoup.Ksoup
import com.fleeksoft.ksoup.Platform
import com.fleeksoft.ksoup.TestHelper
import com.fleeksoft.ksoup.isJS
import com.fleeksoft.ksoup.parser.ParseSettings
import com.fleeksoft.ksoup.parser.Parser
import korlibs.io.lang.Charset
Expand Down Expand Up @@ -154,12 +157,11 @@ class DocumentTest {
}
// tests location vs base href
val `in`: String = TestHelper.getResourceAbsolutePath("htmltests/basehref.html")
val doc: Document =
Ksoup.parseFile(
filePath = `in`,
baseUri = "http://example.com/",
charsetName = "UTF-8",
)
val doc: Document = Ksoup.parseFile(
filePath = `in`,
baseUri = "http://example.com/",
charsetName = "UTF-8",
)
val location = doc.location()
val baseUri = doc.baseUri()
assertEquals("http://example.com/", location)
Expand Down Expand Up @@ -477,15 +479,15 @@ class DocumentTest {
return
}
val input = (
"<html>" +
"<head>" +
"<meta http-equiv=\"content-type\" content=\"text/html; charset=Shift_JIS\" />" +
"</head>" +
"<body>" +
"before&nbsp;after" +
"</body>" +
"</html>"
)
"<html>" +
"<head>" +
"<meta http-equiv=\"content-type\" content=\"text/html; charset=Shift_JIS\" />" +
"</head>" +
"<body>" +
"before&nbsp;after" +
"</body>" +
"</html>"
)
val inputStream = input.encodeToByteArray().openSync()
val doc: Document = Ksoup.parse(syncStream = inputStream, baseUri = "http://example.com", charsetName = null)
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml)
Expand Down
16 changes: 8 additions & 8 deletions ksoup-test/test/com/fleeksoft/ksoup/safety/CleanerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ class CleanerTest {
@Test
fun resolvesRelativeLinks() {
val html = "<a href='/foo'>Link</a><img src='/bar'>"
val clean = Ksoup.clean(html, "http://example.com/", Safelist.basicWithImages())
val clean = Ksoup.clean(bodyHtml = html, safelist = Safelist.basicWithImages(), baseUri = "http://example.com/")
assertEquals(
"<a href=\"http://example.com/foo\" rel=\"nofollow\">Link</a><img src=\"http://example.com/bar\">",
clean,
Expand All @@ -254,7 +254,7 @@ class CleanerTest {
@Test
fun preservesRelativeLinksIfConfigured() {
val html = "<a href='/foo'>Link</a><img src='/bar'> <img src='javascript:alert()'>"
val clean = Ksoup.clean(html, "http://example.com/", Safelist.basicWithImages().preserveRelativeLinks(true))
val clean = Ksoup.clean(bodyHtml = html, safelist = Safelist.basicWithImages().preserveRelativeLinks(true), baseUri = "http://example.com/")
assertEquals("<a href=\"/foo\" rel=\"nofollow\">Link</a><img src=\"/bar\"> <img>", clean)
}

Expand All @@ -269,18 +269,18 @@ class CleanerTest {
fun dropsConcealedJavascriptProtocolWhenRelativesLinksEnabled() {
val safelist = Safelist.basic().preserveRelativeLinks(true)
val html = "<a href=\"&#0013;ja&Tab;va&Tab;script&#0010;:alert(1)\">Link</a>"
val clean = Ksoup.clean(html, "https://", safelist)
val clean = Ksoup.clean(bodyHtml = html, safelist = safelist, baseUri = "https://")
assertEquals("<a rel=\"nofollow\">Link</a>", clean)
val colon = "<a href=\"ja&Tab;va&Tab;script&colon;alert(1)\">Link</a>"
val cleanColon = Ksoup.clean(colon, "https://", safelist)
val cleanColon = Ksoup.clean(bodyHtml = colon, safelist = safelist, baseUri = "https://")
assertEquals("<a rel=\"nofollow\">Link</a>", cleanColon)
}

@Test
fun dropsConcealedJavascriptProtocolWhenRelativesLinksDisabled() {
val safelist = Safelist.basic().preserveRelativeLinks(false)
val html = "<a href=\"ja&Tab;vas&#0013;cript:alert(1)\">Link</a>"
val clean = Ksoup.clean(html, "https://", safelist)
val clean = Ksoup.clean(bodyHtml = html, safelist = safelist, baseUri = "https://")
assertEquals("<a rel=\"nofollow\">Link</a>", clean)
}

Expand Down Expand Up @@ -324,8 +324,8 @@ class CleanerTest {
os.escapeMode(Entities.EscapeMode.extended)
os.charset("ISO-8859-1")
val html = "<div><p>&bernou;</p></div>"
val customOut = Ksoup.clean(html, "http://foo.com/", Safelist.relaxed(), os)
val defaultOut = Ksoup.clean(html, "http://foo.com/", Safelist.relaxed())
val customOut = Ksoup.clean(bodyHtml = html, safelist = Safelist.relaxed(), baseUri = "http://foo.com/", outputSettings = os)
val defaultOut = Ksoup.clean(bodyHtml = html, safelist = Safelist.relaxed(), baseUri = "http://foo.com/")
assertNotSame(defaultOut, customOut)
assertEquals("<div><p>&Bscr;</p></div>", customOut) // entities now prefers shorted names if aliased
assertEquals(
Expand All @@ -336,7 +336,7 @@ class CleanerTest {
)
os.charset("ISO-8859-1")
os.escapeMode(Entities.EscapeMode.base)
val customOut2 = Ksoup.clean(html, "http://foo.com/", Safelist.relaxed(), os)
val customOut2 = Ksoup.clean(bodyHtml = html, safelist = Safelist.relaxed(), baseUri = "http://foo.com/", outputSettings = os)
assertEquals("<div><p>&#x212c;</p></div>", customOut2)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,43 @@ import korlibs.io.stream.openSync
import korlibs.io.stream.readAll
import kotlinx.coroutines.test.runTest
import java.io.*
import java.nio.file.Path
import java.util.zip.GZIPInputStream
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertTrue


class DataUtilTestJvm {
private fun inputStream(data: String): ByteArrayInputStream {
return ByteArrayInputStream(data.encodeToByteArray())
}

@Test
fun loadsGzipPath() = runTest {
val `in`: Path = ParserHelper.getPath("/htmltests/gzip.html.gz")
val doc: Document = Ksoup.parsePath(`in`)
assertEquals("Gzip test", doc.title())
assertEquals("This is a gzipped HTML file.", doc.selectFirst("p")!!.text())
}

@Test
fun loadsZGzipPath() = runTest {
// compressed on win, with z suffix
val `in`: Path = ParserHelper.getPath("htmltests/gzip.html.z")
val doc: Document = Ksoup.parsePath(`in`)
assertEquals("Gzip test", doc.title())
assertEquals("This is a gzipped HTML file.", doc.selectFirst("p")!!.text())
}

@Test
fun handlesFakeGzipPath() = runTest {
val `in`: Path = ParserHelper.getPath("htmltests/fake-gzip.html.gz")
val doc: Document = Ksoup.parsePath(`in`)
assertEquals("This is not gzipped", doc.title())
assertEquals("And should still be readable.", doc.selectFirst("p")!!.text())
}

@Test
fun testParseSequenceBufferReader() = runTest {
// https://github.com/jhy/jsoup/pull/1671
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.fleeksoft.ksoup

import java.nio.file.Path
import java.nio.file.Paths


object ParserHelper {
fun getPath(resourceName: String): Path {
return Paths.get(TestHelper.getResourceAbsolutePath(resourceName))
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.fleeksoft.ksoup.select

import com.fleeksoft.ksoup.Ksoup
import com.fleeksoft.ksoup.nodes.Document
import kotlin.test.Test
import kotlin.test.assertEquals

class SelectorIT {
// todo:// create coroutine test in common
@Test
fun multiThreadHas() {
val html = "<div id=1></div><div id=2><p>One</p><p>Two</p>"
val eval = QueryParser.parse("div:has(p)")

val numThreads = 20
val numThreadLoops = 5

val catcher = ThreadCatcher()

val threads: Array<Thread?> = arrayOfNulls(numThreads)
for (threadNum in 0 until numThreads) {
val thread = Thread {
val doc: Document = Ksoup.parse(html)
for (loop in 0 until numThreadLoops) {
val els: Elements = doc.select(eval)
assertEquals(1, els.size)
assertEquals("2", els[0].id())
}
}
thread.setName("Runner-$threadNum")
thread.start()
thread.setUncaughtExceptionHandler(catcher)
threads[threadNum] = thread
}

// now join them all
for (thread in threads) {
thread?.join()
}

assertEquals(0, catcher.exceptionCount.get())
}

internal class ThreadCatcher : Thread.UncaughtExceptionHandler {
var exceptionCount: java.util.concurrent.atomic.AtomicInteger = java.util.concurrent.atomic.AtomicInteger()

override fun uncaughtException(t: Thread, e: Throwable) {
e.printStackTrace()
exceptionCount.incrementAndGet()
}
}
}
1 change: 1 addition & 0 deletions ksoup/module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ repositories:

dependencies:
- $libs.codepoints
- $libs.stately.concurrent
- $libs.korlibs.io: exported
Loading

0 comments on commit 0086bd2

Please sign in to comment.