Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix byte array read size and Bump versions #91

Merged
merged 6 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
[versions]
agp = "8.5.2"
kotlin = "2.0.20"
kotlin = "2.0.21"
compileSdk = "34"
minSdk = "21"
libraryVersion = "0.1.9"
ktor = "3.0.0-rc-1"
ktor = "3.0.0"
ktor2 = "2.3.12"
coroutines = "1.9.0"
kotlinxDatetime = "0.6.1"
kotlinx-io = "0.5.4"
okio = "3.9.0"
okio = "3.9.1"
dokka = "1.9.20"
kotlinx-benchmark = "0.4.12"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,17 @@ internal class SourceReaderByteArray(bytes: ByteArray) : SourceReader {

override fun read(bytes: ByteArray, offset: Int, length: Int): Int {
var i = offset
var pos = currentPosition
while (exhausted().not() && i < length) {
bytes[i] = source[currentPosition++]
i++
}
return i
val totalRead = currentPosition - pos
return if (totalRead == 0 && exhausted()) {
-1
} else {
totalRead
}
}

override fun readAllBytes(): ByteArray {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public suspend fun Ksoup.parseGetRequest(
val httpResponse = NetworkHelperKtor.instance.get(url, httpRequestBuilder = httpRequestBuilder)
// url can be changed after redirection
val finalUrl = httpResponse.request.url.toString()
return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl)
return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl)
}

/**
Expand All @@ -52,7 +52,7 @@ public suspend fun Ksoup.parseSubmitRequest(
)
// url can be changed after redirection
val finalUrl = httpResponse.request.url.toString()
return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl)
return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl)
}

/**
Expand All @@ -77,5 +77,5 @@ public suspend fun Ksoup.parsePostRequest(
)
// url can be changed after redirection
val finalUrl = httpResponse.request.url.toString()
return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl)
return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl)
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ package com.fleeksoft.ksoup.network
import com.fleeksoft.ksoup.io.SourceReader
import com.fleeksoft.ksoup.io.from
import io.ktor.client.statement.*
import io.ktor.util.*

suspend fun HttpResponse.asSourceReader() = SourceReader.from(this.bodyAsChannel().toByteArray())
suspend fun HttpResponse.asSourceReader() = SourceReader.from(this.bodyAsText().encodeToByteArray())
53 changes: 53 additions & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/meta/MetadataTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class MetadataTest {
<meta name="description" content="Test Description">
<link rel="canonical" href="https://example.com">
<link rel="icon" href="/favicon.ico">
<link href="/pikacon-32x32.png" rel="shortcut icon" type="image/png">
</head>
</html>
""".trimIndent()
Expand Down Expand Up @@ -61,6 +62,58 @@ class MetadataTest {
assertEquals("Test Description", metaData.description)
assertEquals("https://example.com", metaData.canonical)
assertEquals("https://example.com/favicon.ico", metaData.favicon)
assertEquals("https://example.com/pikacon-32x32.png", metaData.shortcutIcon)
}


fun testParseMetaDataFromReader() {
val html = """
<html>
<head>
<link rel="profile" href="http://gmpg.org/xfn/11">
<link rel="alternate" href="https://animepahe.ru" hreflang="en-us">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="theme-color" content="#373a3c"><!-- Chrome, Firefox OS, Opera and Vivaldi -->
<meta name="msapplication-navbutton-color" content="#373a3c"><!-- Windows Phone -->
<meta name="apple-mobile-web-app-status-bar-style" content="#373a3c"><!-- iOS Safari -->
<meta http-equiv="x-dns-prefetch-control" content="on">
<link rel="preconnect" href="//i.animepahe.ru">
<link rel="preload" href="/app/fonts/QldONTRRphEb_-V7LB6xTA.woff2" as="font" type="font/woff2" crossorigin>
<link rel="preload" href="/app/css/bootstrap.min.css" as="style">
<link rel="preload" href="/app/css/fork-awesome.min.css" as="style">
<link rel="preload" href="/app/css/style.css" as="style">
<link rel="preload" href="/app/js/vendor/bootstrap.bundle.min.js" as="script">
<link rel="preload" href="/app/js/core.js" as="script">
<meta name="msapplication-TileImage" content="https://animepahe.ru/animepahe-270x270.png">
<title>animepahe :: okay-ish anime website</title>
<meta name="description" content="Watch or download anime shows in HD 720p/1080p.">
<meta name="keywords" content="Anime,Pahe,Mini,720p,HD,mp4,English,Subtitle,Hardsub">
<meta name="robots" content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1">
<meta property="og:site_name" content="animepahe">
<meta property="og:locale" content="en_US">
<meta property="og:image" content="https://animepahe.ru/animepahe-270x270.png">
<meta property="og:url" content="https://animepahe.ru">
<meta property="og:type" content="website">
<meta property="og:title" content="cloud anime encoding">
<meta property="og:description" content="Watch or download anime shows in HD 720p/1080p.">
<meta name="author" content="animepahe">
<link href="/apple-touch-icon.png" rel="apple-touch-icon-precomposed" type="image/png">
<link href="/apple-touch-icon.png" rel="shortcut icon">
<link href="/pikacon-32x32.png" rel="shortcut icon" type="image/png">
<link href="/pikacon.ico" rel="shortcut icon" type="image/x-icon">
<link rel="alternate" type="application/rss+xml" title="RSS 2.0" href="https://animepahe.ru/feed">
<link rel="stylesheet" href="/app/css/bootstrap.min.css">
<link rel="stylesheet" href="/app/css/fork-awesome.min.css">
<link rel="stylesheet" href="/app/css/style.css">
</head>
</html>
""".trimIndent()

val metaData = Ksoup.parseMetaData(html, "https://animepahe.ru/")
assertEquals("animepahe :: okay-ish anime website", metaData.title)
assertEquals("cloud anime encoding", metaData.ogTitle)
assertEquals("https://animepahe.ru/animepahe-270x270.png", metaData.ogImage)
assertEquals("https://animepahe.ru/apple-touch-icon.png", metaData.shortcutIcon)
}

}
1 change: 1 addition & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class DocumentTest {
assertEquals("", doc.html())
val body = doc.body()
assertEquals("body", body.tagName())
assertNull(doc.headOrNull())
val head = doc.head()
assertEquals("head", head.tagName())
assertEquals(
Expand Down
26 changes: 18 additions & 8 deletions ksoup/src/com/fleeksoft/ksoup/Ksoup.kt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package com.fleeksoft.ksoup

import com.fleeksoft.ksoup.helper.DataUtil
import com.fleeksoft.ksoup.io.Charset
import com.fleeksoft.ksoup.io.FileSource
import com.fleeksoft.ksoup.io.SourceReader
import com.fleeksoft.ksoup.model.MetaData
import com.fleeksoft.ksoup.nodes.Document
import com.fleeksoft.ksoup.nodes.Element
import com.fleeksoft.ksoup.parser.Parser
import com.fleeksoft.ksoup.parser.StreamParser
import com.fleeksoft.ksoup.ported.toSourceFile
import com.fleeksoft.ksoup.safety.Cleaner
import com.fleeksoft.ksoup.safety.Safelist
Expand Down Expand Up @@ -190,9 +190,12 @@ public object Ksoup {
}

fun parseMetaData(element: Element): MetaData {
val title = element.selectFirst("title")?.text()
val el = if (element is Document) {
element.headOrNull() ?: element
} else element
val title = el.selectFirst("title")?.text()
return parseMetaDataInternal(baseUri = element.baseUri(), title = title) { query ->
element.selectFirst(query)
el.selectFirst(query)
}
}

Expand All @@ -201,7 +204,7 @@ public object Ksoup {
baseUri: String = "",
interceptor: ((head: Element, metaData: MetaData) -> Unit)? = null
): MetaData {
val head = parse(html = html, baseUri = baseUri).head()
val head = parse(html = html, baseUri = baseUri).let { doc -> doc.headOrNull() ?: doc }

val title = head.selectFirst("title")?.text()
return parseMetaDataInternal(baseUri = baseUri, title = title) { query ->
Expand All @@ -214,9 +217,10 @@ public object Ksoup {
fun parseMetaData(
sourceReader: SourceReader,
baseUri: String = "",
interceptor: ((headStream: StreamParser, metaData: MetaData) -> Unit)? = null
charset: Charset? = null,
interceptor: ((head: Element, metaData: MetaData) -> Unit)? = null
): MetaData {
val head = DataUtil.streamParser(sourceReader = sourceReader, baseUri = baseUri, null, Parser.htmlParser())
val head = parse(sourceReader = sourceReader, baseUri = baseUri, charsetName = charset?.name).let { doc -> doc.headOrNull() ?: doc }
val title = head.selectFirst("title")?.text()
return parseMetaDataInternal(baseUri = baseUri, title = title) { query ->
head.selectFirst(query)
Expand Down Expand Up @@ -251,10 +255,15 @@ public object Ksoup {

// Fetch favicon
var faviconTag = selectFirst("link[rel~=icon]")?.attr("href")
if (faviconTag != null && !faviconTag.startsWith("http") && baseUri.isNotEmpty()) {
if (faviconTag != null && !faviconTag.startsWith("http", ignoreCase = true) && baseUri.isNotEmpty()) {
faviconTag = baseUri + faviconTag
}

var shortcutIcon = selectFirst("link[rel~=shortcut icon]")?.attr("href")
if (shortcutIcon != null && !shortcutIcon.startsWith("http", ignoreCase = true) && baseUri.isNotEmpty()) {
shortcutIcon = baseUri + shortcutIcon
}

// Create a MetaData object
return MetaData(
ogTitle = ogTitle,
Expand All @@ -273,7 +282,8 @@ public object Ksoup {
canonical = canonicalTag,
htmlTitle = title,
author = author,
favicon = faviconTag
favicon = faviconTag,
shortcutIcon = shortcutIcon,
)
}
}
3 changes: 2 additions & 1 deletion ksoup/src/com/fleeksoft/ksoup/model/MetaData.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ data class MetaData(
val canonical: String? = null,
val htmlTitle: String? = null,
val author: String? = null,
val favicon: String? = null
val favicon: String? = null,
val shortcutIcon: String? = null
)
18 changes: 18 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/nodes/Document.kt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ public class Document(private val namespace: String, private val location: Strin
return html.prependElement("head")
}

/**
Get this document's {@code head} element.
<p>
As a side effect, if this Document does not already have an HTML structure, it will be created. If you do not want
that, use {@code #selectFirst("head")} instead.

@return {@code head} element.
*/
public fun headOrNull(): Element? {
val html: Element = htmlEl()
var el: Element? = html.firstElementChild()
while (el != null) {
if (el.nameIs("head")) return el
el = el.nextElementSibling()
}
return null
}

/**
Get this document's {@code <body>} or {@code <frameset>} element.
<p>
Expand Down
2 changes: 1 addition & 1 deletion ksoup/src/com/fleeksoft/ksoup/nodes/Element.kt
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,7 @@ public open class Element : Node {
): String {
var el: Element? = start
while (el != null) {
if (el.attributes != null && el.attributes!!.hasKey(key)) return el.attributes!![key]
if (el.attributes?.hasKey(key) == true) return el.attributes!![key]
el = el.parent()
}
return ""
Expand Down
Loading