Skip to content

Commit

Permalink
Add comments to Reddit parser
Browse files Browse the repository at this point in the history
  • Loading branch information
alexferrari88 committed Jul 23, 2023
1 parent 299f1d6 commit b24b9ee
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
7 changes: 7 additions & 0 deletions src/extractors/custom/www.reddit.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,11 @@ export const WwwRedditComExtractor = {
'div a[data-test-id="comments-page-link-num-comments"]',
],
},

extend: {
comments: {
selectors: ['div[data-testid="comment"]'],
allowMultiple: true,
},
},
};
24 changes: 22 additions & 2 deletions src/extractors/custom/www.reddit.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';
import URL from 'url';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
import Mercury from 'mercury';
import { excerptContent } from 'utils/text';

const fs = require('fs');
Expand Down Expand Up @@ -224,5 +224,25 @@ describe('WwwRedditComExtractor', () => {

assert.equal(embed.length, 1);
});

it('returns the comments', async () => {
const html = fs.readFileSync(
'./fixtures/www.reddit.com--title-only.html'
);
const uri =
'https://www.reddit.com/r/AskReddit/comments/axtih6/what_is_the_most_worth_it_item_you_have_ever/';

const { comments } = await Mercury.parse(uri, { html });

const expectedCommentsLength = 12;
assert.equal(comments.length, expectedCommentsLength);

const first13 = excerptContent(comments[0] || '', 13);

assert.equal(
first13,
'A Miele canister vacuum. I had read the 4 AMAs from the vacuum'
);
});
});
});

0 comments on commit b24b9ee

Please sign in to comment.