Skip to content

Commit

Permalink
[Bandcamp] Support loading additional comments (#1030)
Browse files Browse the repository at this point in the history
  • Loading branch information
petlyh authored Mar 4, 2023
1 parent 6bdd698 commit 5a9b6ed
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -1,23 +1,36 @@
package org.schabi.newpipe.extractor.services.bandcamp.extractors;

import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_API_URL;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.utils.JsonUtils;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;

import javax.annotation.Nonnull;

public class BandcampCommentsExtractor extends CommentsExtractor {

private static final String REVIEWS_API_URL = BASE_API_URL + "/tralbumcollectors/2/reviews";

private Document document;


Expand All @@ -39,19 +52,81 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()

final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());

final Elements writings = document.getElementsByClass("writing");
final JsonObject collectorsData = JsonUtils.toJsonObject(
document.getElementById("collectors-data").attr("data-blob"));
final JsonArray reviews = collectorsData.getArray("reviews");

for (final Element writing : writings) {
collector.commit(new BandcampCommentsInfoItemExtractor(writing, getUrl()));
for (final Object review : reviews) {
collector.commit(
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
}

return new InfoItemsPage<>(collector, null);
if (!collectorsData.getBoolean("more_reviews_available")) {
return new InfoItemsPage<>(collector, null);
}

final String trackId = getTrackId();
final String token = getNextPageToken(reviews);
return new InfoItemsPage<>(collector, new Page(List.of(trackId, token)));
}

@Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throws IOException, ExtractionException {
return null;

final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());

final List<String> pageIds = page.getIds();
final String trackId = pageIds.get(0);
final String token = pageIds.get(1);
final JsonObject reviewsData = fetchReviewsData(trackId, token);
final JsonArray reviews = reviewsData.getArray("results");

for (final Object review : reviews) {
collector.commit(
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
}

if (!reviewsData.getBoolean("more_available")) {
return new InfoItemsPage<>(collector, null);
}

return new InfoItemsPage<>(collector,
new Page(List.of(trackId, getNextPageToken(reviews))));
}

private JsonObject fetchReviewsData(final String trackId, final String token)
throws ParsingException {
try {
return JsonUtils.toJsonObject(getDownloader().postWithContentTypeJson(
REVIEWS_API_URL,
Collections.emptyMap(),
JsonWriter.string().object()
.value("tralbum_type", "t")
.value("tralbum_id", trackId)
.value("token", token)
.value("count", 7)
.array("exclude_fan_ids").end()
.end().done().getBytes(StandardCharsets.UTF_8)).responseBody());
} catch (final IOException | ReCaptchaException e) {
throw new ParsingException("Could not fetch reviews", e);
}
}

private String getNextPageToken(final JsonArray reviews) throws ParsingException {
return reviews.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(review -> review.getString("token"))
.reduce((a, b) -> b) // keep only the last element
.orElseThrow(() -> new ParsingException("Could not get token"));
}

private String getTrackId() throws ParsingException {
final JsonObject pageProperties = JsonUtils.toJsonObject(
document.selectFirst("meta[name=bc-page-properties]")
.attr("content"));
return Long.toString(pageProperties.getLong("item_id"));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
package org.schabi.newpipe.extractor.services.bandcamp.extractors;

import org.jsoup.nodes.Element;
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;

import com.grack.nanojson.JsonObject;

import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description;

import java.util.Objects;

public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor {

private final Element writing;
private final JsonObject review;
private final String url;

public BandcampCommentsInfoItemExtractor(final Element writing, final String url) {
this.writing = writing;
public BandcampCommentsInfoItemExtractor(final JsonObject review, final String url) {
this.review = review;
this.url = url;
}

Expand All @@ -29,31 +30,21 @@ public String getUrl() {

@Override
public String getThumbnailUrl() throws ParsingException {
return writing.getElementsByClass("thumb").attr("src");
return getUploaderAvatarUrl();
}

@Override
public Description getCommentText() throws ParsingException {
final var text = writing.getElementsByClass("text").stream()
.filter(Objects::nonNull)
.map(Element::ownText)
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get comment text"));

return new Description(text, Description.PLAIN_TEXT);
return new Description(review.getString("why"), Description.PLAIN_TEXT);
}

@Override
public String getUploaderName() throws ParsingException {
return writing.getElementsByClass("name").stream()
.filter(Objects::nonNull)
.map(Element::text)
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get uploader name"));
return review.getString("name");
}

@Override
public String getUploaderAvatarUrl() {
return writing.getElementsByClass("thumb").attr("src");
return getImageUrl(review.getLong("image_id"), false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public void hasComments() throws IOException, ExtractionException {
@Test
public void testGetCommentsAllData() throws IOException, ExtractionException {
ListExtractor.InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
assertTrue(comments.hasNextPage());

DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors());
for (CommentsInfoItem c : comments.getItems()) {
Expand Down

0 comments on commit 5a9b6ed

Please sign in to comment.