-
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Get images working again * Re-add custom extractors
- Loading branch information
Showing
16 changed files
with
179 additions
and
25 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
capy/src/main/java/com/jocmp/capy/articles/HtmlPostProcessor.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package com.jocmp.capy.articles | ||
|
||
import org.jsoup.nodes.Document | ||
|
||
object HtmlPostProcessor { | ||
fun clean(document: Document, hideImages: Boolean) { | ||
cleanStyles(document) | ||
cleanLinks(document) | ||
if (hideImages) { | ||
removeImages(document) | ||
} | ||
wrapTables(document) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,33 @@ | ||
package com.jocmp.capy.articles | ||
|
||
import com.jocmp.capy.Article | ||
import net.dankito.readability4j.Readability4J | ||
import org.json.JSONObject | ||
|
||
fun parseHtml(article: Article, html: String): String { | ||
try { | ||
val uri = (article.feedURL ?: article.url).toString() | ||
val readability4J = Readability4J(uri, html) | ||
val content = readability4J.parse().articleContent ?: return "" | ||
return """ | ||
<script> | ||
(async () => { | ||
let downloaded = ${JSONObject(mapOf("value" to html))}; | ||
content.getElementsByClass("readability-styled").forEach { element -> | ||
element.append(" ") | ||
} | ||
Mercury.parse("${article.url?.toString()}", { html: downloaded.value }).then(article => { | ||
let extracted = document.createElement("div"); | ||
return content.html() | ||
} catch (ex: Throwable) { | ||
return "" | ||
} | ||
extracted.id = "article-body-content" | ||
extracted.innerHTML = article.content; | ||
let shouldAddImage = article.lead_image_url && | ||
![...extracted.querySelectorAll("img")].some(img => img.src.includes(article.lead_image_url)); | ||
if (shouldAddImage) { | ||
let leadImage = document.createElement("img"); | ||
leadImage.src = article.lead_image_url; | ||
extracted.prepend(leadImage); | ||
} | ||
let content = document.getElementById("article-body-content"); | ||
content.replaceWith(extracted); | ||
}); | ||
})(); | ||
</script> | ||
""".trimIndent() | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
```kotlin | ||
webView.evaluateJavascript( | ||
"""(function test() { | ||
return "hello"; | ||
})(); | ||
""".trimIndent()) { | ||
it | ||
} | ||
``` | ||
|
||
1. Load placeholder text | ||
2. Fetch full content | ||
3. evaluateJavascript -> Parser(content) |