Skip to content

Commit

Permalink
Improve garbage collection
Browse files Browse the repository at this point in the history
  • Loading branch information
Flameish committed Sep 7, 2020
1 parent 9ff71b3 commit 3fadfe2
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 18 deletions.
38 changes: 21 additions & 17 deletions src/main/java/grabber/Chapter.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
*/
public class Chapter implements Serializable {
private static int chapterId = 0; // Used to set unique filenames
public Element chapterContent;
public Element chapterContainer;
public String chapterContent;
public Document doc;
public String name;
public String chapterURL;
Expand All @@ -48,7 +49,7 @@ public void saveChapter(Novel novel) {
ChapterContentScripts.fetchContent(novel, this);

// Check for empty content
if (chapterContent == null) {
if (chapterContainer == null) {
if(init.gui != null) {
init.gui.appendText(novel.window,
"[GRABBER]Chapter container (" + novel.chapterContainer + ") not found.");
Expand All @@ -64,23 +65,22 @@ public void saveChapter(Novel novel) {
novel.nextChapterURL = doc.select(novel.nextChapterBtn).first().absUrl("href");
}
}

removeUnwantedTags(novel);

if (novel.getImages) {
getImages(novel);
// Remove <img> tags.
// Images would be loaded from the host via original href links on the eReader if left in.
} else {
chapterContent.select("img").remove();
chapterContainer.select("img").remove();
}

if (novel.displayChapterTitle) {
chapterContent.prepend(
chapterContainer.prepend(
"<span style=\"font-weight: 700; text-decoration: underline;\">" + name + "</span><br>" + EPUB.NL);
}
chapterContent.prepend(EPUB.htmlHead);
chapterContent.append( EPUB.NL+EPUB.htmlFoot);
chapterContainer.prepend(EPUB.htmlHead);
chapterContainer.append( EPUB.NL+EPUB.htmlFoot);

cleanHtml();

Expand All @@ -91,6 +91,10 @@ public void saveChapter(Novel novel) {
}
System.out.println("[GRABBER]Saved chapter: "+ name);
status = 1;
// Improve GC
chapterContent = chapterContainer.toString();
doc = null;
chapterContainer = null;
}

/**
Expand All @@ -99,21 +103,21 @@ public void saveChapter(Novel novel) {
*/
private void removeUnwantedTags(Novel novel) {
// Always remove <script>
chapterContent.select("script").remove();
chapterContent.select("style").remove();
chapterContainer.select("script").remove();
chapterContainer.select("style").remove();
// Try to remove navigation links
String[] blacklistedWords = new String[] {"next","previous","table","index","back","chapter","home"};
for(Element link: chapterContent.select("a[href]")) {
for(Element link: chapterContainer.select("a[href]")) {
if(Arrays.stream(blacklistedWords).anyMatch(link.text().toLowerCase()::contains)) link.remove();
}
if (novel.removeStyling) {
chapterContent.select("[style]").removeAttr("style");
chapterContainer.select("[style]").removeAttr("style");
}

if (novel.blacklistedTags != null && !novel.blacklistedTags.isEmpty()) {
for (String tag : novel.blacklistedTags) {
if (!chapterContent.select(tag).isEmpty()) {
chapterContent.select(tag).remove();
if (!chapterContainer.select(tag).isEmpty()) {
chapterContainer.select(tag).remove();
}
}
}
Expand All @@ -125,7 +129,7 @@ private void removeUnwantedTags(Novel novel) {
* @param novel
*/
private void updatePageCount(Novel novel) {
novel.wordCount = novel.wordCount + GrabberUtils.getWordCount(chapterContent.toString());
novel.wordCount = novel.wordCount + GrabberUtils.getWordCount(chapterContainer.toString());
if(init.gui != null && !novel.window.equals("checker")) {
init.gui.pagesCountLbl.setText(String.valueOf(novel.wordCount / 300));
}
Expand All @@ -136,7 +140,7 @@ private void updatePageCount(Novel novel) {
* @param novel
*/
private void getImages(Novel novel) {
for (Element image : chapterContent.select("img")) {
for (Element image : chapterContainer.select("img")) {
try {
String imageURL = image.absUrl("src");
String imageFilename = GrabberUtils.getFilenameFromUrl(imageURL);
Expand Down Expand Up @@ -174,11 +178,11 @@ private void cleanHtml() {
outputSettings.syntax(Document.OutputSettings.Syntax.xml);
outputSettings.escapeMode(Entities.EscapeMode.xhtml);

String chapter = chapterContent.toString().replaceAll("<br>", "\n");
String chapter = chapterContainer.toString().replaceAll("<br>", "\n");
TagNode tagNode = cleaner.clean(chapter);

String html = "<" + tagNode.getName() + ">" + cleaner.getInnerHtml(tagNode) + "</" + tagNode.getName() + ">";
chapterContent = Jsoup.parse(html).outputSettings(outputSettings);
chapterContainer = Jsoup.parse(html).outputSettings(outputSettings);

}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/grabber/EPUB.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ private void addChapters() throws IOException {
for(Chapter chapter: novel.chapterList) {
if(chapter.status == 1) {
inputStream = new ByteArrayInputStream(
chapter.chapterContent.toString()
chapter.chapterContent
.getBytes(StandardCharsets.UTF_8)
);
Resource resource = new Resource(inputStream, chapter.fileName + ".html");
Expand Down

0 comments on commit 3fadfe2

Please sign in to comment.