Skip to content

Commit

Permalink
Bring back Mercury (#569)
Browse files Browse the repository at this point in the history
* Get images working again

* Re-add custom extractors
  • Loading branch information
jocmp authored Dec 2, 2024
1 parent 5f08dbc commit 97a99f0
Show file tree
Hide file tree
Showing 16 changed files with 179 additions and 25 deletions.
2 changes: 2 additions & 0 deletions app/mercury.web.js

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import com.jocmp.capy.ArticleStatus
import com.jocmp.capy.Feed
import com.jocmp.capy.Folder
import com.jocmp.capy.MarkRead
import com.jocmp.capy.articles.parseHtml
import com.jocmp.capy.buildArticlePager
import com.jocmp.capy.common.UnauthorizedError
import com.jocmp.capy.common.launchIO
Expand Down Expand Up @@ -408,7 +407,7 @@ class ArticleScreenViewModel(
onSuccess = { value ->
if (_article?.id == article.id) {
_article = article.copy(
content = parseHtml(article, value),
content = value,
fullContent = Article.FullContentState.LOADED
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class WebViewState(

withContext(Dispatchers.Main) {
webView.loadDataWithBaseURL(
ASSET_BASE_URL,
article.url?.toString(),
html,
null,
"UTF-8",
Expand Down Expand Up @@ -211,7 +211,6 @@ fun rememberWebViewState(
settings.apply {
javaScriptEnabled = true
mediaPlaybackRequiresUserGesture = false
offscreenPreRaster = true
}
isVerticalScrollBarEnabled = false
isHorizontalScrollBarEnabled = false
Expand Down
2 changes: 1 addition & 1 deletion article_forge/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ SHELL:=/usr/bin/env bash

.PHONY: forge clean

build: $(TEMPLATE_DEST) $(STYLE_DEST) $(MEDIA_LISTENERS_JS_DEST)
build: $(TEMPLATE_DEST) $(STYLE_DEST) $(MEDIA_LISTENERS_JS_DEST) $(CUSTOM_EXTRACTOR_JS_DEST)

.PHONY: forge
forge:
Expand Down
51 changes: 51 additions & 0 deletions article_forge/public/assets/custom-extractors.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions article_forge/script/generate-android-template
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def main
file_content = File.read("./views/#{filename}.liquid")
file_content = file_content.gsub("{{body}}", "")
file_content = file_content.gsub(" {{debug_script}}\n", "")
file_content = file_content.gsub("/assets/", 'https://appassets.androidplatform.net/assets/')

assert_clean(file_content)

Expand Down
2 changes: 2 additions & 0 deletions article_forge/views/template.liquid
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
{{font_preload}}
<link rel="stylesheet" href="/assets/stylesheet.css">
<script type="text/javascript" src="/assets/media.js"></script>
<script type="text/javascript" src="/assets/mercury-parser.js"></script>
<script type="text/javascript" src="/assets/custom-extractors.js"></script>
</head>
<body>
<article>
Expand Down
51 changes: 51 additions & 0 deletions capy/src/main/assets/custom-extractors.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions capy/src/main/assets/mercury-parser.js

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions capy/src/main/java/com/jocmp/capy/Article.kt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ data class Article(
) {
val defaultContent = contentHTML.ifBlank { summary }

val parseFullContent = fullContent == FullContentState.LOADED && extractedContentURL == null

enum class FullContentState {
NONE,
LOADING,
Expand Down
15 changes: 9 additions & 6 deletions capy/src/main/java/com/jocmp/capy/articles/ArticleRenderer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,17 @@ class ArticleRenderer(
article.siteURL?.let { setBaseUri(it) }
}

document.getElementById("article-body-content")?.append(article.content)
if (article.parseFullContent) {
val contentHTML = Jsoup.parse(article.content)

cleanStyles(document)
cleanLinks(document)
if (hideImages) {
removeImages(document)
HtmlPostProcessor.clean(contentHTML, hideImages = hideImages)

document.getElementById("article-body-content")?.append(parseHtml(article, contentHTML.html()))
} else {
document.getElementById("article-body-content")?.append(article.content)

HtmlPostProcessor.clean(document, hideImages = hideImages)
}
wrapTables(document)

return document.html()
}
Expand Down
14 changes: 14 additions & 0 deletions capy/src/main/java/com/jocmp/capy/articles/HtmlPostProcessor.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.jocmp.capy.articles

import org.jsoup.nodes.Document

object HtmlPostProcessor {
fun clean(document: Document, hideImages: Boolean) {
cleanStyles(document)
cleanLinks(document)
if (hideImages) {
removeImages(document)
}
wrapTables(document)
}
}
37 changes: 25 additions & 12 deletions capy/src/main/java/com/jocmp/capy/articles/ParseHTML.kt
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
package com.jocmp.capy.articles

import com.jocmp.capy.Article
import net.dankito.readability4j.Readability4J
import org.json.JSONObject

fun parseHtml(article: Article, html: String): String {
try {
val uri = (article.feedURL ?: article.url).toString()
val readability4J = Readability4J(uri, html)
val content = readability4J.parse().articleContent ?: return ""
return """
<script>
(async () => {
let downloaded = ${JSONObject(mapOf("value" to html))};
content.getElementsByClass("readability-styled").forEach { element ->
element.append("&nbsp;")
}
Mercury.parse("${article.url?.toString()}", { html: downloaded.value }).then(article => {
let extracted = document.createElement("div");
return content.html()
} catch (ex: Throwable) {
return ""
}
extracted.id = "article-body-content"
extracted.innerHTML = article.content;
let shouldAddImage = article.lead_image_url &&
![...extracted.querySelectorAll("img")].some(img => img.src.includes(article.lead_image_url));
if (shouldAddImage) {
let leadImage = document.createElement("img");
leadImage.src = article.lead_image_url;
extracted.prepend(leadImage);
}
let content = document.getElementById("article-body-content");
content.replaceWith(extracted);
});
})();
</script>
""".trimIndent()
}
6 changes: 4 additions & 2 deletions capy/src/main/res/raw/template.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

File renamed without changes.
13 changes: 13 additions & 0 deletions technotes/Scratch/2024-11 Mercury Parser.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
```kotlin
webView.evaluateJavascript(
"""(function test() {
return "hello";
})();
""".trimIndent()) {
it
}
```

1. Load placeholder text
2. Fetch full content
3. evaluateJavascript -> Parser(content)

0 comments on commit 97a99f0

Please sign in to comment.