diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 35f4f02..5097310 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,15 +1,15 @@ [versions] agp = "8.5.2" -kotlin = "2.0.20" +kotlin = "2.0.21" compileSdk = "34" minSdk = "21" libraryVersion = "0.1.9" -ktor = "3.0.0-rc-1" +ktor = "3.0.0" ktor2 = "2.3.12" coroutines = "1.9.0" kotlinxDatetime = "0.6.1" kotlinx-io = "0.5.4" -okio = "3.9.0" +okio = "3.9.1" dokka = "1.9.20" kotlinx-benchmark = "0.4.12" diff --git a/ksoup-engine-common/src/com/fleeksoft/ksoup/io/SourceReaderByteArray.kt b/ksoup-engine-common/src/com/fleeksoft/ksoup/io/SourceReaderByteArray.kt index 9aafca5..6b28bf4 100644 --- a/ksoup-engine-common/src/com/fleeksoft/ksoup/io/SourceReaderByteArray.kt +++ b/ksoup-engine-common/src/com/fleeksoft/ksoup/io/SourceReaderByteArray.kt @@ -37,11 +37,17 @@ internal class SourceReaderByteArray(bytes: ByteArray) : SourceReader { override fun read(bytes: ByteArray, offset: Int, length: Int): Int { var i = offset + var pos = currentPosition while (exhausted().not() && i < length) { bytes[i] = source[currentPosition++] i++ } - return i + val totalRead = currentPosition - pos + return if (totalRead == 0 && exhausted()) { + -1 + } else { + totalRead + } } override fun readAllBytes(): ByteArray { diff --git a/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/KsoupNetwork.kt b/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/KsoupNetwork.kt index 8ff0165..bb6b537 100644 --- a/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/KsoupNetwork.kt +++ b/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/KsoupNetwork.kt @@ -25,7 +25,7 @@ public suspend fun Ksoup.parseGetRequest( val httpResponse = NetworkHelperKtor.instance.get(url, httpRequestBuilder = httpRequestBuilder) // url can be changed after redirection val finalUrl = httpResponse.request.url.toString() - return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl) + return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl) } /** @@ -52,7 +52,7 @@ public suspend fun Ksoup.parseSubmitRequest( ) // url can be changed after redirection val finalUrl = httpResponse.request.url.toString() - return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl) + return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl) } /** @@ -77,5 +77,5 @@ public suspend fun Ksoup.parsePostRequest( ) // url can be changed after redirection val finalUrl = httpResponse.request.url.toString() - return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl) + return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl) } diff --git a/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt b/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt index f28297e..0a05bda 100644 --- a/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt +++ b/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt @@ -3,6 +3,5 @@ package com.fleeksoft.ksoup.network import com.fleeksoft.ksoup.io.SourceReader import com.fleeksoft.ksoup.io.from import io.ktor.client.statement.* -import io.ktor.util.* -suspend fun HttpResponse.asSourceReader() = SourceReader.from(this.bodyAsChannel().toByteArray()) +suspend fun HttpResponse.asSourceReader() = SourceReader.from(this.bodyAsText().encodeToByteArray()) diff --git a/ksoup-test/test/com/fleeksoft/ksoup/meta/MetadataTest.kt b/ksoup-test/test/com/fleeksoft/ksoup/meta/MetadataTest.kt index 84e8bb6..e9b7c64 100644 --- a/ksoup-test/test/com/fleeksoft/ksoup/meta/MetadataTest.kt +++ b/ksoup-test/test/com/fleeksoft/ksoup/meta/MetadataTest.kt @@ -21,6 +21,7 @@ class MetadataTest { + """.trimIndent() @@ -61,6 +62,58 @@ class MetadataTest { assertEquals("Test Description", metaData.description) assertEquals("https://example.com", metaData.canonical) assertEquals("https://example.com/favicon.ico", metaData.favicon) + assertEquals("https://example.com/pikacon-32x32.png", metaData.shortcutIcon) + } + + + fun testParseMetaDataFromReader() { + val html = """ + + + + + + + + + + + + + + + + + + animepahe :: okay-ish anime website + + + + + + + + + + + + + + + + + + + + + + """.trimIndent() + + val metaData = Ksoup.parseMetaData(html, "https://animepahe.ru/") + assertEquals("animepahe :: okay-ish anime website", metaData.title) + assertEquals("cloud anime encoding", metaData.ogTitle) + assertEquals("https://animepahe.ru/animepahe-270x270.png", metaData.ogImage) + assertEquals("https://animepahe.ru/apple-touch-icon.png", metaData.shortcutIcon) } } \ No newline at end of file diff --git a/ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt b/ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt index d514e32..913e308 100644 --- a/ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt +++ b/ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt @@ -79,6 +79,7 @@ class DocumentTest { assertEquals("", doc.html()) val body = doc.body() assertEquals("body", body.tagName()) + assertNull(doc.headOrNull()) val head = doc.head() assertEquals("head", head.tagName()) assertEquals( diff --git a/ksoup/src/com/fleeksoft/ksoup/Ksoup.kt b/ksoup/src/com/fleeksoft/ksoup/Ksoup.kt index ffad98c..61515da 100644 --- a/ksoup/src/com/fleeksoft/ksoup/Ksoup.kt +++ b/ksoup/src/com/fleeksoft/ksoup/Ksoup.kt @@ -1,13 +1,13 @@ package com.fleeksoft.ksoup import com.fleeksoft.ksoup.helper.DataUtil +import com.fleeksoft.ksoup.io.Charset import com.fleeksoft.ksoup.io.FileSource import com.fleeksoft.ksoup.io.SourceReader import com.fleeksoft.ksoup.model.MetaData import com.fleeksoft.ksoup.nodes.Document import com.fleeksoft.ksoup.nodes.Element import com.fleeksoft.ksoup.parser.Parser -import com.fleeksoft.ksoup.parser.StreamParser import com.fleeksoft.ksoup.ported.toSourceFile import com.fleeksoft.ksoup.safety.Cleaner import com.fleeksoft.ksoup.safety.Safelist @@ -190,9 +190,12 @@ public object Ksoup { } fun parseMetaData(element: Element): MetaData { - val title = element.selectFirst("title")?.text() + val el = if (element is Document) { + element.headOrNull() ?: element + } else element + val title = el.selectFirst("title")?.text() return parseMetaDataInternal(baseUri = element.baseUri(), title = title) { query -> - element.selectFirst(query) + el.selectFirst(query) } } @@ -201,7 +204,7 @@ public object Ksoup { baseUri: String = "", interceptor: ((head: Element, metaData: MetaData) -> Unit)? = null ): MetaData { - val head = parse(html = html, baseUri = baseUri).head() + val head = parse(html = html, baseUri = baseUri).let { doc -> doc.headOrNull() ?: doc } val title = head.selectFirst("title")?.text() return parseMetaDataInternal(baseUri = baseUri, title = title) { query -> @@ -214,9 +217,10 @@ public object Ksoup { fun parseMetaData( sourceReader: SourceReader, baseUri: String = "", - interceptor: ((headStream: StreamParser, metaData: MetaData) -> Unit)? = null + charset: Charset? = null, + interceptor: ((head: Element, metaData: MetaData) -> Unit)? = null ): MetaData { - val head = DataUtil.streamParser(sourceReader = sourceReader, baseUri = baseUri, null, Parser.htmlParser()) + val head = parse(sourceReader = sourceReader, baseUri = baseUri, charsetName = charset?.name).let { doc -> doc.headOrNull() ?: doc } val title = head.selectFirst("title")?.text() return parseMetaDataInternal(baseUri = baseUri, title = title) { query -> head.selectFirst(query) @@ -251,10 +255,15 @@ public object Ksoup { // Fetch favicon var faviconTag = selectFirst("link[rel~=icon]")?.attr("href") - if (faviconTag != null && !faviconTag.startsWith("http") && baseUri.isNotEmpty()) { + if (faviconTag != null && !faviconTag.startsWith("http", ignoreCase = true) && baseUri.isNotEmpty()) { faviconTag = baseUri + faviconTag } + var shortcutIcon = selectFirst("link[rel~=shortcut icon]")?.attr("href") + if (shortcutIcon != null && !shortcutIcon.startsWith("http", ignoreCase = true) && baseUri.isNotEmpty()) { + shortcutIcon = baseUri + shortcutIcon + } + // Create a MetaData object return MetaData( ogTitle = ogTitle, @@ -273,7 +282,8 @@ public object Ksoup { canonical = canonicalTag, htmlTitle = title, author = author, - favicon = faviconTag + favicon = faviconTag, + shortcutIcon = shortcutIcon, ) } } \ No newline at end of file diff --git a/ksoup/src/com/fleeksoft/ksoup/model/MetaData.kt b/ksoup/src/com/fleeksoft/ksoup/model/MetaData.kt index 8350f24..ff38c85 100644 --- a/ksoup/src/com/fleeksoft/ksoup/model/MetaData.kt +++ b/ksoup/src/com/fleeksoft/ksoup/model/MetaData.kt @@ -17,5 +17,6 @@ data class MetaData( val canonical: String? = null, val htmlTitle: String? = null, val author: String? = null, - val favicon: String? = null + val favicon: String? = null, + val shortcutIcon: String? = null ) \ No newline at end of file diff --git a/ksoup/src/com/fleeksoft/ksoup/nodes/Document.kt b/ksoup/src/com/fleeksoft/ksoup/nodes/Document.kt index efebabd..3184749 100644 --- a/ksoup/src/com/fleeksoft/ksoup/nodes/Document.kt +++ b/ksoup/src/com/fleeksoft/ksoup/nodes/Document.kt @@ -98,6 +98,24 @@ public class Document(private val namespace: String, private val location: Strin return html.prependElement("head") } + /** + Get this document's {@code head} element. +

+ As a side effect, if this Document does not already have an HTML structure, it will be created. If you do not want + that, use {@code #selectFirst("head")} instead. + + @return {@code head} element. + */ + public fun headOrNull(): Element? { + val html: Element = htmlEl() + var el: Element? = html.firstElementChild() + while (el != null) { + if (el.nameIs("head")) return el + el = el.nextElementSibling() + } + return null + } + /** Get this document's {@code } or {@code } element.

diff --git a/ksoup/src/com/fleeksoft/ksoup/nodes/Element.kt b/ksoup/src/com/fleeksoft/ksoup/nodes/Element.kt index d79fa0b..3f731eb 100644 --- a/ksoup/src/com/fleeksoft/ksoup/nodes/Element.kt +++ b/ksoup/src/com/fleeksoft/ksoup/nodes/Element.kt @@ -1840,7 +1840,7 @@ public open class Element : Node { ): String { var el: Element? = start while (el != null) { - if (el.attributes != null && el.attributes!!.hasKey(key)) return el.attributes!![key] + if (el.attributes?.hasKey(key) == true) return el.attributes!![key] el = el.parent() } return ""