From 83e0c8034eadda5907f9bbf807909e8abfaa4722 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 17:33:58 -0500 Subject: [PATCH 01/22] Extract countdown text and time from app page --- services/steam-scraper.js | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 313fe72..6fae867 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -11,6 +11,7 @@ const TITLE_REMOVE = [ const PERCENT_REGEX = /(\d+%)/; const REVIEWS_COUNT_REGEX = /([\d,]+) user review/; +const DISCOUNT_COUNTDOWN_REGEX = /DiscountCountdown,[ ]*([\d]{7,})/; async function getAppPageData(appUrl) { let appPageHtml = await _rp({ url: appUrl }); @@ -63,6 +64,13 @@ function extractReviewsCount(input) { } } +function extractDiscountCountdown(input) { + let match = DISCOUNT_COUNTDOWN_REGEX.exec(input); + if (match) { + return match[1]; + } +} + function stripQueryString(url) { return url.split(/[?#]/)[0]; } @@ -143,6 +151,8 @@ function getGameDataFromGameElement(gameElement) { let $ = _cheerio.load(gameElement); let title = ""; + let countdownText = ""; + let countdownTime = ""; let price = ""; let discounted = false; let originalPrice = ""; @@ -155,6 +165,17 @@ function getGameDataFromGameElement(gameElement) { } } + try { + countdownText = $('.game_purchase_discount_countdown').text().trim(); + } catch { }; + + + try { + let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; + let countdownTimeText = extractDiscountCountdown(countdownScript); + countdownTime = parseInt(countdownTimeText); + } catch { }; + originalPrice = $('.discount_original_price').text().trim(); percentOff = extractPercent($('.discount_pct').text().trim()); @@ -166,6 +187,8 @@ function getGameDataFromGameElement(gameElement) { return { title, + countdownText, + countdownTime, originalPrice, discounted, price, From 4c1914aaec93f90dd584a6b0846d70feb2afd2c5 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 17:49:31 -0500 Subject: [PATCH 02/22] Refactor --- app.js | 16 +++++----- services/steam-scraper.js | 64 +++++++++++++++++++++++++++------------ 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/app.js b/app.js index f03314f..e8bafd9 100644 --- a/app.js +++ b/app.js @@ -12,14 +12,8 @@ function main() { _app.post('/api/app-scrape', async (req, res) => { let appUrl = req.body.url; - let gamePageData = await _steamScraper.getAppPageData(appUrl); - res.json(gamePageData); - }); - - _app.post('/api/headset-scrape', async (req, res) => { - let searchUrl = req.body.url; - let searchPageData = await _steamScraper.getHeadsetsFromAppPage(searchUrl); - res.json(searchPageData); + let appPageData = await _steamScraper.getAppPageData(appUrl); + res.json(appPageData); }); _app.post('/api/search-scrape', async (req, res) => { @@ -28,6 +22,12 @@ function main() { res.json(searchPageData); }); + _app.post('/api/search-app-scrape', async (req, res) => { + let appUrl = req.body.url; + let appPageData = await _steamScraper.getSearchAppPageData(appUrl); + res.json(appPageData); + }); + _app.listen(PORT, () => { console.log(`App listening on port ${PORT}!`); }); diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 6fae867..293d576 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -88,10 +88,49 @@ function getHeadsets($) { return headsets; } -async function getHeadsetsFromAppPage(link) { - let pageHtml = await _rp({ url: link }); - let $ = _cheerio.load(pageHtml); - return getHeadsets($); +async function getSearchAppPageData(appUrl) { + let appPageHtml = await _rp({ url: appUrl }); + let $ = _cheerio.load(appPageHtml); + + let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game')); + if (gameElements.length < 1) { + return { + error: true, + message: "Could not find any game elements." + }; + } + + let firstGame = gameElements[0]; + let countdown = getCountdown(firstGame); + let headsets = getHeadsets($); + + return { + countdown, + headsets + }; +} + +function getCountdown(gameElement) { + let $ = _cheerio.load(gameElement); + + let text = ""; + let time = 0; + + try { + text = $('.game_purchase_discount_countdown').text().trim(); + } catch { }; + + + try { + let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; + let countdownTimeText = extractDiscountCountdown(countdownScript); + time = parseInt(countdownTimeText); + } catch { }; + + return { + text, + time + } } async function getGameDataFromSearchResult(searchResult) { @@ -151,8 +190,6 @@ function getGameDataFromGameElement(gameElement) { let $ = _cheerio.load(gameElement); let title = ""; - let countdownText = ""; - let countdownTime = ""; let price = ""; let discounted = false; let originalPrice = ""; @@ -165,17 +202,6 @@ function getGameDataFromGameElement(gameElement) { } } - try { - countdownText = $('.game_purchase_discount_countdown').text().trim(); - } catch { }; - - - try { - let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; - let countdownTimeText = extractDiscountCountdown(countdownScript); - countdownTime = parseInt(countdownTimeText); - } catch { }; - originalPrice = $('.discount_original_price').text().trim(); percentOff = extractPercent($('.discount_pct').text().trim()); @@ -187,8 +213,6 @@ function getGameDataFromGameElement(gameElement) { return { title, - countdownText, - countdownTime, originalPrice, discounted, price, @@ -198,6 +222,6 @@ function getGameDataFromGameElement(gameElement) { module.exports = { getAppPageData, - getHeadsetsFromAppPage, + getSearchAppPageData, getSearchPageData }; \ No newline at end of file From d227bdbc664899755df42df3f6faa30d9afa84f9 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 17:53:45 -0500 Subject: [PATCH 03/22] Fix UI for headsets --- public/scripts/index.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index b3cf54c..e479c98 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -131,7 +131,9 @@ async function retrieveSteamSearchTable() { let content = { url: app.link }; - app.headsets = await post('./api/headset-scrape', content); + + let appData = await post('./api/search-app-scrape', content); + app.headsets = appData.headsets; } } From 2e760d64499ec7e7ca92c3a5b14b1d0ace8afeaa Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 18:04:03 -0500 Subject: [PATCH 04/22] Add countdown data to UI --- public/scripts/index.js | 1 + 1 file changed, 1 insertion(+) diff --git a/public/scripts/index.js b/public/scripts/index.js index e479c98..bfeb5f5 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -134,6 +134,7 @@ async function retrieveSteamSearchTable() { let appData = await post('./api/search-app-scrape', content); app.headsets = appData.headsets; + app.countdown = appData.countdown; } } From d0befda959a1e31a7d2ec3388286c509396a7793 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 18:20:03 -0500 Subject: [PATCH 05/22] Move formatting to new method --- public/scripts/index.js | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index bfeb5f5..3b1dcb6 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -156,6 +156,29 @@ async function retrieveSteamSearchTable() { retrieveSearchButton.disabled = false; } +function formatAppData(app) { + let platform = app.headsets.map(platform => getHeadsetAbbreviation(platform)).join('/'); + let title = escapePipes(app.title); + let link = app.link; + let price = extractNumberFromPrice(app.price) || app.price || ""; + let percentOff = extractNumberFromPercent(app.percentOff) || app.percentOff || ""; + let reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent || ""; + let reviewsCount = app.reviewsCount || ""; + + let bundlePrefix = app.type == "BUNDLE" ? "**Bundle** - " : ""; + title = `${bundlePrefix}[${title}](${link})`; + + return { + platform, + title, + link, + price, + percentOff, + reviews, + reviewsCount + } +} + async function retrieveSearchPageData(steamSearchUrl, pageNumber) { let content = { url: `${steamSearchUrl}` @@ -172,17 +195,8 @@ function createMarkdownTable(searchData) { let result = header + NEW_LINE + divider + NEW_LINE; for (let app of searchData) { - let platform = app.headsets.map(platform => getHeadsetAbbreviation(platform)).join('/'); - let title = escapePipes(app.title); - let link = app.link; - let price = extractNumberFromPrice(app.price) || app.price || ""; - let percentOff = extractNumberFromPercent(app.percentOff) || app.percentOff || ""; - let reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent || ""; - let reviewsCount = app.reviewsCount || ""; - - let bundlePrefix = app.type == "BUNDLE" ? "**Bundle** - " : ""; - - result += `| ${platform} | ${bundlePrefix}[${title}](${link}) | ${price} | ${percentOff} | ${reviews} | ${reviewsCount} |` + NEW_LINE; + let formatted = formatAppData(app); + result += `| ${formatted.platform} | ${formatted.title} | ${formatted.price} | ${formatted.percentOff} | ${formatted.reviews} | ${formatted.reviewsCount} |` + NEW_LINE; } return result; From b6bba594c5eac6178850aef1dc12f2b4912acc24 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 18:34:27 -0500 Subject: [PATCH 06/22] Add extra data --- public/scripts/index.js | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index 3b1dcb6..3d11b93 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -29,6 +29,10 @@ const headsetAliases = { } } +let cache = { + searchData: [] +} + async function retrieveSteamAppTitle() { let retrievePageButton = document.getElementById('retrieve-steam-app-title'); let pageResultsDiv = document.getElementById('page-results'); @@ -157,22 +161,32 @@ async function retrieveSteamSearchTable() { } function formatAppData(app) { - let platform = app.headsets.map(platform => getHeadsetAbbreviation(platform)).join('/'); - let title = escapePipes(app.title); + let type = app.type; + let platform = app.headsets.join(', '); + let platformAbbreviated = app.headsets.map(platform => getHeadsetAbbreviation(platform)).join('/'); + let title = app.title; + let titleLink = escapePipes(app.title); let link = app.link; let price = extractNumberFromPrice(app.price) || app.price || ""; + let originalPrice = extractNumberFromPrice(app.originalPrice) || app.price || ""; + let discounted = app.discounted; let percentOff = extractNumberFromPercent(app.percentOff) || app.percentOff || ""; let reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent || ""; let reviewsCount = app.reviewsCount || ""; let bundlePrefix = app.type == "BUNDLE" ? "**Bundle** - " : ""; - title = `${bundlePrefix}[${title}](${link})`; + titleLink = `${bundlePrefix}[${titleLink}](${link})`; return { + type, platform, + platformAbbreviated, title, + titleLink, link, price, + originalPrice, + discounted, percentOff, reviews, reviewsCount @@ -194,11 +208,16 @@ function createMarkdownTable(searchData) { let divider = '| :- | :- | -: | -: | -: | -: |'; let result = header + NEW_LINE + divider + NEW_LINE; + let formattedData = []; + for (let app of searchData) { let formatted = formatAppData(app); - result += `| ${formatted.platform} | ${formatted.title} | ${formatted.price} | ${formatted.percentOff} | ${formatted.reviews} | ${formatted.reviewsCount} |` + NEW_LINE; + result += `| ${formatted.platformAbbreviated} | ${formatted.titleLink} | ${formatted.price} | ${formatted.percentOff} | ${formatted.reviews} | ${formatted.reviewsCount} |` + NEW_LINE; + formattedData.push(formatted); } + cache.searchData = formattedData; + return result; } From 218f4a243421880752768201fc9d1b1d4a96ae08 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 18:37:23 -0500 Subject: [PATCH 07/22] Add countdown text and time --- public/scripts/index.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/scripts/index.js b/public/scripts/index.js index 3d11b93..75cb963 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -171,6 +171,8 @@ function formatAppData(app) { let originalPrice = extractNumberFromPrice(app.originalPrice) || app.price || ""; let discounted = app.discounted; let percentOff = extractNumberFromPercent(app.percentOff) || app.percentOff || ""; + let countdownText = app.countdown.text; + let countdownTime = app.countdown.time; let reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent || ""; let reviewsCount = app.reviewsCount || ""; @@ -188,6 +190,8 @@ function formatAppData(app) { originalPrice, discounted, percentOff, + countdownText, + countdownTime, reviews, reviewsCount } From e4762add04541aecacd943a9ea6379781a585498 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Wed, 13 Nov 2019 18:40:20 -0500 Subject: [PATCH 08/22] Fix constants --- public/scripts/index.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index 75cb963..6bd08ee 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -6,7 +6,9 @@ const PERCENT_NUMBER_REGEX = /(\d+)%/; const NEW_LINE = ' '; const MAX_PAGES = 100; -const headsetAliases = { +const BUNDLE_PREFIX = "**Bundle** - "; + +const HEADSET_ALIASES = { 'Valve Index': { shortName: 'Index', abbreviation: 'I' @@ -176,8 +178,8 @@ function formatAppData(app) { let reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent || ""; let reviewsCount = app.reviewsCount || ""; - let bundlePrefix = app.type == "BUNDLE" ? "**Bundle** - " : ""; - titleLink = `${bundlePrefix}[${titleLink}](${link})`; + let titlePrefix = app.type == "BUNDLE" ? BUNDLE_PREFIX : ""; + titleLink = `${titlePrefix}[${titleLink}](${link})`; return { type, @@ -265,7 +267,7 @@ function getPlatformText(platforms) { } function getHeadsetshortName(headsetName) { - let headsetAlias = headsetAliases[headsetName]; + let headsetAlias = HEADSET_ALIASES[headsetName]; if (headsetAlias) { return headsetAlias.shortName; } else { @@ -274,7 +276,7 @@ function getHeadsetshortName(headsetName) { } function getHeadsetAbbreviation(headsetName) { - let headsetAlias = headsetAliases[headsetName]; + let headsetAlias = HEADSET_ALIASES[headsetName]; if (headsetAlias) { return headsetAlias.abbreviation; } else { From 5f66c8585a824e1caf1cec6aa96c87ebaa56fe52 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 10:08:35 -0500 Subject: [PATCH 09/22] Create regex utils --- services/steam-scraper.js | 34 ++++++---------------------------- utils/regex-utils.js | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 28 deletions(-) create mode 100644 utils/regex-utils.js diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 293d576..28d7756 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -1,5 +1,6 @@ const _cheerio = require('cheerio'); const _rp = require('request-promise'); +const _regexUtils = require('../utils/regex-utils'); const TITLE_REMOVE = [ 'Buy', @@ -9,10 +10,6 @@ const TITLE_REMOVE = [ 'Pre-Purchase' ]; -const PERCENT_REGEX = /(\d+%)/; -const REVIEWS_COUNT_REGEX = /([\d,]+) user review/; -const DISCOUNT_COUNTDOWN_REGEX = /DiscountCountdown,[ ]*([\d]{7,})/; - async function getAppPageData(appUrl) { let appPageHtml = await _rp({ url: appUrl }); let $ = _cheerio.load(appPageHtml); @@ -50,26 +47,7 @@ async function getSearchPageData(searchUrl) { return searchPageData; } -function extractPercent(input) { - let match = PERCENT_REGEX.exec(input); - if (match) { - return match[1]; - } -} - -function extractReviewsCount(input) { - let match = REVIEWS_COUNT_REGEX.exec(input); - if (match) { - return match[1]; - } -} -function extractDiscountCountdown(input) { - let match = DISCOUNT_COUNTDOWN_REGEX.exec(input); - if (match) { - return match[1]; - } -} function stripQueryString(url) { return url.split(/[?#]/)[0]; @@ -123,7 +101,7 @@ function getCountdown(gameElement) { try { let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; - let countdownTimeText = extractDiscountCountdown(countdownScript); + let countdownTimeText = _regexUtils.extractDiscountCountdown(countdownScript); time = parseInt(countdownTimeText); } catch { }; @@ -157,7 +135,7 @@ async function getGameDataFromSearchResult(searchResult) { price = $('div.search_price').clone().children().remove().end().text().trim(); originalPrice = $('div.search_price > span > strike').text().trim(); - percentOff = extractPercent($('div.search_discount > span').text().trim()); + percentOff = _regexUtils.extractPercent($('div.search_discount > span').text().trim()); if (originalPrice && percentOff) { discounted = true; @@ -168,8 +146,8 @@ async function getGameDataFromSearchResult(searchResult) { if (reviewsSummary) { reviewsSummary = reviewsSummary.trim(); - reviewsPercent = extractPercent(reviewsSummary); - reviewsCount = extractReviewsCount(reviewsSummary).replace(/,/g, ''); + reviewsPercent = _regexUtils.extractPercent(reviewsSummary); + reviewsCount = _regexUtils.extractReviewsCount(reviewsSummary).replace(/,/g, ''); } } @@ -203,7 +181,7 @@ function getGameDataFromGameElement(gameElement) { } originalPrice = $('.discount_original_price').text().trim(); - percentOff = extractPercent($('.discount_pct').text().trim()); + percentOff = _regexUtils.extractPercent($('.discount_pct').text().trim()); if (originalPrice && percentOff) { discounted = true; diff --git a/utils/regex-utils.js b/utils/regex-utils.js new file mode 100644 index 0000000..5e7035f --- /dev/null +++ b/utils/regex-utils.js @@ -0,0 +1,30 @@ +const PERCENT_REGEX = /(\d+%)/; +const REVIEWS_COUNT_REGEX = /([\d,]+) user review/; +const DISCOUNT_COUNTDOWN_REGEX = /DiscountCountdown,[ ]*([\d]{7,})/; + +function extractPercent(input) { + let match = PERCENT_REGEX.exec(input); + if (match) { + return match[1]; + } +} + +function extractReviewsCount(input) { + let match = REVIEWS_COUNT_REGEX.exec(input); + if (match) { + return match[1]; + } +} + +function extractDiscountCountdown(input) { + let match = DISCOUNT_COUNTDOWN_REGEX.exec(input); + if (match) { + return match[1]; + } +} + +module.exports = { + extractPercent, + extractReviewsCount, + extractDiscountCountdown +} \ No newline at end of file From a0cacbda8801ec7e15f5994f8b5c930336e2c3b8 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 10:17:40 -0500 Subject: [PATCH 10/22] Consolidate getting first game --- public/scripts/index.js | 14 +++++++++----- services/steam-scraper.js | 18 ++++++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index 6bd08ee..ba1db22 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -139,8 +139,12 @@ async function retrieveSteamSearchTable() { }; let appData = await post('./api/search-app-scrape', content); - app.headsets = appData.headsets; - app.countdown = appData.countdown; + app.headsets = appData.headsets || []; + app.countdown = appData.countdown || { text: "", time: 0 }; + } else { + app.headsets = []; + app.countdown.text = ""; + app.countdown.time = 0; } } @@ -210,15 +214,15 @@ async function retrieveSearchPageData(steamSearchUrl, pageNumber) { } function createMarkdownTable(searchData) { - let header = '| Platform | Title | Price (USD) | Discount (%) | Rating (%) | Review Count |'; - let divider = '| :- | :- | -: | -: | -: | -: |'; + let header = '| Platform | Title | Price (USD) | Discount (%) | Rating (%) | Review Count | Text | Time |'; + let divider = '| :- | :- | -: | -: | -: | -: | | |'; let result = header + NEW_LINE + divider + NEW_LINE; let formattedData = []; for (let app of searchData) { let formatted = formatAppData(app); - result += `| ${formatted.platformAbbreviated} | ${formatted.titleLink} | ${formatted.price} | ${formatted.percentOff} | ${formatted.reviews} | ${formatted.reviewsCount} |` + NEW_LINE; + result += `| ${formatted.platformAbbreviated} | ${formatted.titleLink} | ${formatted.price} | ${formatted.percentOff} | ${formatted.reviews} | ${formatted.reviewsCount} | ${formatted.countdownText} | ${formatted.countdownTime}` + NEW_LINE; formattedData.push(formatted); } diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 28d7756..19e22b7 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -14,15 +14,14 @@ async function getAppPageData(appUrl) { let appPageHtml = await _rp({ url: appUrl }); let $ = _cheerio.load(appPageHtml); - let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game')); - if (gameElements.length < 1) { + let firstGame = getMainGameElement($); + if (!firstGame) { return { error: true, message: "Could not find any game elements." }; } - let firstGame = gameElements[0]; let gameData = getGameDataFromGameElement(firstGame); let headsets = getHeadsets($); @@ -66,19 +65,26 @@ function getHeadsets($) { return headsets; } +function getMainGameElement($) { + let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game')); + if (gameElements.length < 1) { + return; + } + return gameElements[0]; +} + async function getSearchAppPageData(appUrl) { let appPageHtml = await _rp({ url: appUrl }); let $ = _cheerio.load(appPageHtml); - let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game')); - if (gameElements.length < 1) { + let firstGame = getMainGameElement($); + if (!firstGame) { return { error: true, message: "Could not find any game elements." }; } - let firstGame = gameElements[0]; let countdown = getCountdown(firstGame); let headsets = getHeadsets($); From d382aeb90c50545a8145c96d1dc37ef0b46bec60 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 10:21:45 -0500 Subject: [PATCH 11/22] Add string-utils, reorder methods --- services/steam-scraper.js | 76 +++++++++++++++++++-------------------- utils/string-utils.js | 7 ++++ 2 files changed, 43 insertions(+), 40 deletions(-) create mode 100644 utils/string-utils.js diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 19e22b7..9786d0f 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -1,6 +1,7 @@ const _cheerio = require('cheerio'); const _rp = require('request-promise'); const _regexUtils = require('../utils/regex-utils'); +const _stringUtils = require('../utils/string-utils'); const TITLE_REMOVE = [ 'Buy', @@ -10,7 +11,22 @@ const TITLE_REMOVE = [ 'Pre-Purchase' ]; -async function getAppPageData(appUrl) { +async function getSearchPageData(searchUrl) { + let searchPageHtml = await _rp({ url: searchUrl }); + let $ = _cheerio.load(searchPageHtml); + + let searchResults = Array.from($('#search_resultsRows > a.search_result_row')); + + let searchPageData = []; + for (var searchResult of searchResults) { + let gameData = await getGameDataFromSearchResult(searchResult); + searchPageData.push(gameData); + } + return searchPageData; +} + + +async function getSearchAppPageData(appUrl) { let appPageHtml = await _rp({ url: appUrl }); let $ = _cheerio.load(appPageHtml); @@ -22,34 +38,35 @@ async function getAppPageData(appUrl) { }; } - let gameData = getGameDataFromGameElement(firstGame); + let countdown = getCountdown(firstGame); let headsets = getHeadsets($); return { - link: appUrl, - ...gameData, + countdown, headsets }; } -async function getSearchPageData(searchUrl) { - let searchPageHtml = await _rp({ url: searchUrl }); - let $ = _cheerio.load(searchPageHtml); - - let searchResults = Array.from($('#search_resultsRows > a.search_result_row')); +async function getAppPageData(appUrl) { + let appPageHtml = await _rp({ url: appUrl }); + let $ = _cheerio.load(appPageHtml); - let searchPageData = []; - for (var searchResult of searchResults) { - let gameData = await getGameDataFromSearchResult(searchResult); - searchPageData.push(gameData); + let firstGame = getMainGameElement($); + if (!firstGame) { + return { + error: true, + message: "Could not find any game elements." + }; } - return searchPageData; -} - + let gameData = getGameDataFromGameElement(firstGame); + let headsets = getHeadsets($); -function stripQueryString(url) { - return url.split(/[?#]/)[0]; + return { + link: appUrl, + ...gameData, + headsets + }; } function getHeadsets($) { @@ -73,27 +90,6 @@ function getMainGameElement($) { return gameElements[0]; } -async function getSearchAppPageData(appUrl) { - let appPageHtml = await _rp({ url: appUrl }); - let $ = _cheerio.load(appPageHtml); - - let firstGame = getMainGameElement($); - if (!firstGame) { - return { - error: true, - message: "Could not find any game elements." - }; - } - - let countdown = getCountdown(firstGame); - let headsets = getHeadsets($); - - return { - countdown, - headsets - }; -} - function getCountdown(gameElement) { let $ = _cheerio.load(gameElement); @@ -131,7 +127,7 @@ async function getGameDataFromSearchResult(searchResult) { let reviewsCount = ""; title = $('div.search_name > span.title').text().trim(); - link = stripQueryString(searchResult.attribs.href); + link = _stringUtils.stripQueryString(searchResult.attribs.href); if (link.includes('/app/')) { type = "APP"; diff --git a/utils/string-utils.js b/utils/string-utils.js new file mode 100644 index 0000000..75c7823 --- /dev/null +++ b/utils/string-utils.js @@ -0,0 +1,7 @@ +function stripQueryString(url) { + return url.split(/[?#]/)[0]; +} + +module.exports = { + stripQueryString +} \ No newline at end of file From 321b21e2374de2fb24ba46800f62a88310df954f Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 10:36:13 -0500 Subject: [PATCH 12/22] Refactoring --- services/steam-scraper.js | 109 ++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 64 deletions(-) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 9786d0f..2ebcc31 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -72,13 +72,16 @@ async function getAppPageData(appUrl) { function getHeadsets($) { let headsetTitleElement = $('.details_block.vrsupport > div:contains("Headsets")').parent(); let headsetElements = Array.from(headsetTitleElement.nextUntil('.details_block')); + let headsets = []; + for (var headsetElement of headsetElements) { let headsetName = $('.name', headsetElement).text().trim(); if (headsetName) { headsets.push(headsetName); } } + return headsets; } @@ -93,111 +96,89 @@ function getMainGameElement($) { function getCountdown(gameElement) { let $ = _cheerio.load(gameElement); - let text = ""; - let time = 0; + let countdownData = { + text: "", + time: 0 + } try { - text = $('.game_purchase_discount_countdown').text().trim(); + countdownData.text = $('.game_purchase_discount_countdown').text().trim(); } catch { }; try { let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; let countdownTimeText = _regexUtils.extractDiscountCountdown(countdownScript); - time = parseInt(countdownTimeText); + countdownData.time = parseInt(countdownTimeText); } catch { }; - return { - text, - time - } + return countdownData; } async function getGameDataFromSearchResult(searchResult) { let $ = _cheerio.load(searchResult); - let title = ""; - let link = ""; - let type = "UNKNOWN"; - let price = ""; - let discounted = false; - let originalPrice = ""; - let percentOff = ""; - let reviewsPercent = ""; - let reviewsCount = ""; - - title = $('div.search_name > span.title').text().trim(); - link = _stringUtils.stripQueryString(searchResult.attribs.href); - - if (link.includes('/app/')) { - type = "APP"; - } else if (link.includes('/bundle/')) { - type = "BUNDLE"; + let gameData = { + title: "", + link: "", + type: "UNKNOWN", + originalPrice: "", + price: "", + percentOff: "", + reviewsPercent: "", + reviewsCount: "" } - price = $('div.search_price').clone().children().remove().end().text().trim(); - originalPrice = $('div.search_price > span > strike').text().trim(); - percentOff = _regexUtils.extractPercent($('div.search_discount > span').text().trim()); + gameData.title = $('div.search_name > span.title').text().trim(); + gameData.link = _stringUtils.stripQueryString(searchResult.attribs.href); - if (originalPrice && percentOff) { - discounted = true; + if (gameData.link.includes('/app/')) { + gameData.type = "APP"; + } else if (gameData.link.includes('/bundle/')) { + gameData.type = "BUNDLE"; } - if (type == "APP") { + gameData.price = $('div.search_price').clone().children().remove().end().text().trim(); + gameData.originalPrice = $('div.search_price > span > strike').text().trim(); + gameData.percentOff = _regexUtils.extractPercent($('div.search_discount > span').text().trim()); + + if (gameData.type == "APP") { let reviewsSummary = $('div.search_reviewscore > span.search_review_summary').attr('data-tooltip-html'); if (reviewsSummary) { reviewsSummary = reviewsSummary.trim(); - reviewsPercent = _regexUtils.extractPercent(reviewsSummary); - reviewsCount = _regexUtils.extractReviewsCount(reviewsSummary).replace(/,/g, ''); + gameData.reviewsPercent = _regexUtils.extractPercent(reviewsSummary); + gameData.reviewsCount = _regexUtils.extractReviewsCount(reviewsSummary).replace(/,/g, ''); } } - return { - title, - link, - type, - originalPrice, - discounted, - price, - percentOff, - reviewsPercent, - reviewsCount, - }; + return gameData; } function getGameDataFromGameElement(gameElement) { let $ = _cheerio.load(gameElement); - let title = ""; - let price = ""; - let discounted = false; - let originalPrice = ""; - let percentOff = ""; + let gameData = { + title: "", + originalPrice: "", + price: "", + percentOff: "" + } - title = $('.game_area_purchase_game > h1').children().remove().end().text().trim(); + let title = $('.game_area_purchase_game > h1').children().remove().end().text().trim(); for (var removeKeyword of TITLE_REMOVE) { if (title.startsWith(removeKeyword)) { title = title.substr(removeKeyword.length).trim(); } } + gameData.title = title; - originalPrice = $('.discount_original_price').text().trim(); - percentOff = _regexUtils.extractPercent($('.discount_pct').text().trim()); + gameData.originalPrice = $('.discount_original_price').text().trim(); + gameData.percentOff = _regexUtils.extractPercent($('.discount_pct').text().trim()); - if (originalPrice && percentOff) { - discounted = true; - } + gameData.price = gameData.originalPrice ? $('.discount_final_price').text().trim() : $('.game_purchase_price').text().trim();; - price = discounted ? $('.discount_final_price').text().trim() : $('.game_purchase_price').text().trim();; - - return { - title, - originalPrice, - discounted, - price, - percentOff - }; + return gameData; } module.exports = { From 0c026cb2ec892bec679e65ce43badc3825b7343b Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 10:39:18 -0500 Subject: [PATCH 13/22] Reordering --- services/steam-scraper.js | 62 +++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 2ebcc31..ace7d88 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -38,7 +38,7 @@ async function getSearchAppPageData(appUrl) { }; } - let countdown = getCountdown(firstGame); + let countdown = getCountdownFromGameElement(firstGame); let headsets = getHeadsets($); return { @@ -69,6 +69,14 @@ async function getAppPageData(appUrl) { }; } +function getMainGameElement($) { + let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game')); + if (gameElements.length < 1) { + return; + } + return gameElements[0]; +} + function getHeadsets($) { let headsetTitleElement = $('.details_block.vrsupport > div:contains("Headsets")').parent(); let headsetElements = Array.from(headsetTitleElement.nextUntil('.details_block')); @@ -85,36 +93,6 @@ function getHeadsets($) { return headsets; } -function getMainGameElement($) { - let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game')); - if (gameElements.length < 1) { - return; - } - return gameElements[0]; -} - -function getCountdown(gameElement) { - let $ = _cheerio.load(gameElement); - - let countdownData = { - text: "", - time: 0 - } - - try { - countdownData.text = $('.game_purchase_discount_countdown').text().trim(); - } catch { }; - - - try { - let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; - let countdownTimeText = _regexUtils.extractDiscountCountdown(countdownScript); - countdownData.time = parseInt(countdownTimeText); - } catch { }; - - return countdownData; -} - async function getGameDataFromSearchResult(searchResult) { let $ = _cheerio.load(searchResult); @@ -181,6 +159,28 @@ function getGameDataFromGameElement(gameElement) { return gameData; } +function getCountdownFromGameElement(gameElement) { + let $ = _cheerio.load(gameElement); + + let countdownData = { + text: "", + time: 0 + } + + try { + countdownData.text = $('.game_purchase_discount_countdown').text().trim(); + } catch { }; + + + try { + let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; + let countdownTimeText = _regexUtils.extractDiscountCountdown(countdownScript); + countdownData.time = parseInt(countdownTimeText); + } catch { }; + + return countdownData; +} + module.exports = { getAppPageData, getSearchAppPageData, From 42131030be95afe16c3e2b3562893f8943e04c9b Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 10:45:27 -0500 Subject: [PATCH 14/22] App page also gets countdown --- services/steam-scraper.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index ace7d88..1636b6f 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -60,11 +60,13 @@ async function getAppPageData(appUrl) { } let gameData = getGameDataFromGameElement(firstGame); + let countdown = getCountdownFromGameElement(firstGame); let headsets = getHeadsets($); return { link: appUrl, ...gameData, + countdown, headsets }; } From 9a205a5839d364c001b5f78bc2ad414ee1aa8681 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 10:54:47 -0500 Subject: [PATCH 15/22] Safer property setting --- services/steam-scraper.js | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 1636b6f..a2c0d4a 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -109,8 +109,8 @@ async function getGameDataFromSearchResult(searchResult) { reviewsCount: "" } - gameData.title = $('div.search_name > span.title').text().trim(); - gameData.link = _stringUtils.stripQueryString(searchResult.attribs.href); + gameData.title = $('div.search_name > span.title').text().trim() || ""; + gameData.link = _stringUtils.stripQueryString(searchResult.attribs.href) || ""; if (gameData.link.includes('/app/')) { gameData.type = "APP"; @@ -118,17 +118,17 @@ async function getGameDataFromSearchResult(searchResult) { gameData.type = "BUNDLE"; } - gameData.price = $('div.search_price').clone().children().remove().end().text().trim(); - gameData.originalPrice = $('div.search_price > span > strike').text().trim(); - gameData.percentOff = _regexUtils.extractPercent($('div.search_discount > span').text().trim()); + gameData.price = $('div.search_price').clone().children().remove().end().text().trim() || ""; + gameData.originalPrice = $('div.search_price > span > strike').text().trim() || ""; + gameData.percentOff = _regexUtils.extractPercent($('div.search_discount > span').text().trim()) || ""; if (gameData.type == "APP") { let reviewsSummary = $('div.search_reviewscore > span.search_review_summary').attr('data-tooltip-html'); if (reviewsSummary) { reviewsSummary = reviewsSummary.trim(); - gameData.reviewsPercent = _regexUtils.extractPercent(reviewsSummary); - gameData.reviewsCount = _regexUtils.extractReviewsCount(reviewsSummary).replace(/,/g, ''); + gameData.reviewsPercent = _regexUtils.extractPercent(reviewsSummary) || ""; + gameData.reviewsCount = _regexUtils.extractReviewsCount(reviewsSummary).replace(/,/g, '') || ""; } } @@ -151,12 +151,12 @@ function getGameDataFromGameElement(gameElement) { title = title.substr(removeKeyword.length).trim(); } } - gameData.title = title; + gameData.title = title || ""; - gameData.originalPrice = $('.discount_original_price').text().trim(); - gameData.percentOff = _regexUtils.extractPercent($('.discount_pct').text().trim()); + gameData.originalPrice = $('.discount_original_price').text().trim() || ""; + gameData.percentOff = _regexUtils.extractPercent($('.discount_pct').text().trim()) || ""; - gameData.price = gameData.originalPrice ? $('.discount_final_price').text().trim() : $('.game_purchase_price').text().trim();; + gameData.price = gameData.originalPrice ? $('.discount_final_price').text().trim() || "" : $('.game_purchase_price').text().trim() || ""; return gameData; } @@ -170,14 +170,14 @@ function getCountdownFromGameElement(gameElement) { } try { - countdownData.text = $('.game_purchase_discount_countdown').text().trim(); + countdownData.text = $('.game_purchase_discount_countdown').text().trim() || ""; } catch { }; try { let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; let countdownTimeText = _regexUtils.extractDiscountCountdown(countdownScript); - countdownData.time = parseInt(countdownTimeText); + countdownData.time = parseInt(countdownTimeText) || 0; } catch { }; return countdownData; From 6e9840b83f3486dd1cabc4741863dcb4ce970573 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 11:02:55 -0500 Subject: [PATCH 16/22] Safer property setting --- services/steam-scraper.js | 70 +++++++++++++++++++++++++++++++-------- utils/regex-utils.js | 2 +- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index a2c0d4a..b3a2aad 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -109,8 +109,15 @@ async function getGameDataFromSearchResult(searchResult) { reviewsCount: "" } - gameData.title = $('div.search_name > span.title').text().trim() || ""; - gameData.link = _stringUtils.stripQueryString(searchResult.attribs.href) || ""; + let title = $('div.search_name > span.title').text().trim(); + if (title) { + gameData.title = title; + } + + let link = _stringUtils.stripQueryString(searchResult.attribs.href); + if (link) { + gameData.link = link; + } if (gameData.link.includes('/app/')) { gameData.type = "APP"; @@ -118,17 +125,35 @@ async function getGameDataFromSearchResult(searchResult) { gameData.type = "BUNDLE"; } - gameData.price = $('div.search_price').clone().children().remove().end().text().trim() || ""; - gameData.originalPrice = $('div.search_price > span > strike').text().trim() || ""; - gameData.percentOff = _regexUtils.extractPercent($('div.search_discount > span').text().trim()) || ""; + let price = $('div.search_price').clone().children().remove().end().text().trim(); + if (price) { + gameData.price = price; + } + + let originalPrice = $('div.search_price > span > strike').text().trim(); + if (originalPrice) { + gameData.originalPrice = originalPrice; + } + + let percentOff = _regexUtils.extractPercent($('div.search_discount > span').text().trim()); + if (percentOff) { + gameData.percentOff = percentOff; + } if (gameData.type == "APP") { let reviewsSummary = $('div.search_reviewscore > span.search_review_summary').attr('data-tooltip-html'); if (reviewsSummary) { reviewsSummary = reviewsSummary.trim(); - gameData.reviewsPercent = _regexUtils.extractPercent(reviewsSummary) || ""; - gameData.reviewsCount = _regexUtils.extractReviewsCount(reviewsSummary).replace(/,/g, '') || ""; + let reviewsPercent = _regexUtils.extractPercent(reviewsSummary); + if (reviewsPercent) { + gameData.reviewsPercent = reviewsPercent; + } + + let reviewsCount = _regexUtils.extractReviewsCount(reviewsSummary); + if (reviewsCount) { + gameData.reviewsCount = reviewsCount; + } } } @@ -151,12 +176,25 @@ function getGameDataFromGameElement(gameElement) { title = title.substr(removeKeyword.length).trim(); } } - gameData.title = title || ""; - gameData.originalPrice = $('.discount_original_price').text().trim() || ""; - gameData.percentOff = _regexUtils.extractPercent($('.discount_pct').text().trim()) || ""; + if (title) { + gameData.title = title; + } + + let originalPrice = $('.discount_original_price').text().trim(); + if (originalPrice) { + gameData.originalPrice = originalPrice; + } + + let percentOff = _regexUtils.extractPercent($('.discount_pct').text().trim()); + if (percentOff) { + gameData.percentOff = percentOff; + } - gameData.price = gameData.originalPrice ? $('.discount_final_price').text().trim() || "" : $('.game_purchase_price').text().trim() || ""; + let price = gameData.originalPrice ? $('.discount_final_price').text().trim() : $('.game_purchase_price').text().trim(); + if (price) { + gameData.price = price; + } return gameData; } @@ -170,14 +208,20 @@ function getCountdownFromGameElement(gameElement) { } try { - countdownData.text = $('.game_purchase_discount_countdown').text().trim() || ""; + let text = $('.game_purchase_discount_countdown').text().trim(); + if (text) { + countdownData.text = text; + } } catch { }; try { let countdownScript = $('.game_area_purchase_game > script')[0].children[0].data; let countdownTimeText = _regexUtils.extractDiscountCountdown(countdownScript); - countdownData.time = parseInt(countdownTimeText) || 0; + let time = parseInt(countdownTimeText); + if (time) { + countdownData.time = time; + } } catch { }; return countdownData; diff --git a/utils/regex-utils.js b/utils/regex-utils.js index 5e7035f..8b221ca 100644 --- a/utils/regex-utils.js +++ b/utils/regex-utils.js @@ -12,7 +12,7 @@ function extractPercent(input) { function extractReviewsCount(input) { let match = REVIEWS_COUNT_REGEX.exec(input); if (match) { - return match[1]; + return match[1].replace(/,/g, ''); } } From 5480dd80e16b400832ce8389a9d374ab72c87e51 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 11:09:47 -0500 Subject: [PATCH 17/22] Fix games with demo --- services/steam-scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index b3a2aad..5a89ee6 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -72,7 +72,7 @@ async function getAppPageData(appUrl) { } function getMainGameElement($) { - let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game')); + let gameElements = Array.from($('#game_area_purchase .game_area_purchase_game:not(.demo_above_purchase)')); if (gameElements.length < 1) { return; } From c9ace2508eade9700d49f9d614bf2642e1fb5705 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 11:16:35 -0500 Subject: [PATCH 18/22] Reordering --- services/steam-scraper.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/steam-scraper.js b/services/steam-scraper.js index 5a89ee6..d566727 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -99,11 +99,11 @@ async function getGameDataFromSearchResult(searchResult) { let $ = _cheerio.load(searchResult); let gameData = { - title: "", link: "", + title: "", type: "UNKNOWN", - originalPrice: "", price: "", + originalPrice: "", percentOff: "", reviewsPercent: "", reviewsCount: "" @@ -165,8 +165,8 @@ function getGameDataFromGameElement(gameElement) { let gameData = { title: "", - originalPrice: "", price: "", + originalPrice: "", percentOff: "" } From 134dc25c2d4375674f6e2a410aae4d6f2dbbcd4d Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 11:21:34 -0500 Subject: [PATCH 19/22] Refactoring --- public/scripts/index.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index ba1db22..583bdb6 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -139,12 +139,14 @@ async function retrieveSteamSearchTable() { }; let appData = await post('./api/search-app-scrape', content); - app.headsets = appData.headsets || []; - app.countdown = appData.countdown || { text: "", time: 0 }; + app.headsets = appData.headsets; + app.countdown = appData.countdown; } else { app.headsets = []; - app.countdown.text = ""; - app.countdown.time = 0; + app.countdown = { + text: "", + time: 0 + } } } From 1746009f157cdf316d59e28a9c52c91c86645294 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 11:35:17 -0500 Subject: [PATCH 20/22] Better way to build link text --- public/scripts/index.js | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index 583bdb6..2cbfe22 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -57,24 +57,17 @@ async function retrieveSteamAppTitle() { try { let appData = await post('./api/app-scrape', content); - let discounted = appData.discounted; - let isVr = appData.headsets.length > 0; - - let link = document.createElement('a'); - if (isVr) { + let text = ""; + if (appData.headsets.length > 0) { let platforms = getPlatformText(appData.headsets); - if (discounted) { - link.innerText = `[${platforms}] ${appData.title} (${appData.price} / ${appData.percentOff} off)`; - } else { - link.innerText = `[${platforms}] ${appData.title} (${appData.price})`; - } - } else { - if (discounted) { - link.innerText = `${appData.title} (${appData.price} / ${appData.percentOff} off)`; - } else { - link.innerText = `${appData.title} (${appData.price})`; - } + text += `[${platforms}] ` } + text += `${appData.title} ` + let priceTag = appData.percentOff ? `(${appData.price} / ${appData.percentOff} off)` : `(${appData.price})`; + text += `${priceTag}` + + let link = document.createElement('a'); + link.innerText = text; link.href = appData.link; link.target = '_blank'; link.style.display = 'inline'; From c9ea40359aacf244792809ec07190eb637cd0149 Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 11:55:55 -0500 Subject: [PATCH 21/22] Refactoring --- public/scripts/index.js | 75 ++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/public/scripts/index.js b/public/scripts/index.js index 2cbfe22..b588fd1 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -125,15 +125,14 @@ async function retrieveSteamSearchTable() { for (let [index, app] of searchData.entries()) { let itemNumber = index + 1; searchResultsDiv.innerHTML = `Retrieving result ${itemNumber} of ${searchData.length}...`; - app.headsets = []; if (app.type == "APP") { let content = { url: app.link }; let appData = await post('./api/search-app-scrape', content); - app.headsets = appData.headsets; - app.countdown = appData.countdown; + app.headsets = appData.headsets || []; + app.countdown = appData.countdown || { text: "", time: 0 }; } else { app.headsets = []; app.countdown = { @@ -162,40 +161,40 @@ async function retrieveSteamSearchTable() { } function formatAppData(app) { - let type = app.type; - let platform = app.headsets.join(', '); - let platformAbbreviated = app.headsets.map(platform => getHeadsetAbbreviation(platform)).join('/'); - let title = app.title; - let titleLink = escapePipes(app.title); - let link = app.link; - let price = extractNumberFromPrice(app.price) || app.price || ""; - let originalPrice = extractNumberFromPrice(app.originalPrice) || app.price || ""; - let discounted = app.discounted; - let percentOff = extractNumberFromPercent(app.percentOff) || app.percentOff || ""; - let countdownText = app.countdown.text; - let countdownTime = app.countdown.time; - let reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent || ""; - let reviewsCount = app.reviewsCount || ""; + let formattedData = { + type: "", + platform: "", + platformAbbreviated: "", + title: "", + titleLink: "", + link: "", + price: "", + originalPrice: "", + percentOff: "", + countdownText: "", + countdownTime: 0, + reviews: "", + reviewsCount: "" + } + + formattedData.type = app.type; + formattedData.platform = app.headsets.join(', '); + formattedData.platformAbbreviated = app.headsets.map(platform => getHeadsetAbbreviation(platform)).join('/'); + formattedData.title = app.title; let titlePrefix = app.type == "BUNDLE" ? BUNDLE_PREFIX : ""; - titleLink = `${titlePrefix}[${titleLink}](${link})`; - - return { - type, - platform, - platformAbbreviated, - title, - titleLink, - link, - price, - originalPrice, - discounted, - percentOff, - countdownText, - countdownTime, - reviews, - reviewsCount - } + formattedData.titleLink = `${titlePrefix}[${escapePipes(app.title)}](${app.link})`; + + formattedData.link = app.link; + formattedData.price = extractNumberFromPrice(app.price) || app.price; + formattedData.originalPrice = extractNumberFromPrice(app.originalPrice) || app.price; + formattedData.percentOff = extractNumberFromPercent(app.percentOff) || app.percentOff; + formattedData.countdownText = app.countdown.text; + formattedData.countdownTime = app.countdown.time; + formattedData.reviews = extractNumberFromPercent(app.reviewsPercent) || app.reviewsPercent; + formattedData.reviewsCount = app.reviewsCount; + + return formattedData; } async function retrieveSearchPageData(steamSearchUrl, pageNumber) { @@ -209,15 +208,15 @@ async function retrieveSearchPageData(steamSearchUrl, pageNumber) { } function createMarkdownTable(searchData) { - let header = '| Platform | Title | Price (USD) | Discount (%) | Rating (%) | Review Count | Text | Time |'; - let divider = '| :- | :- | -: | -: | -: | -: | | |'; + let header = '| Platform | Title | Price (USD) | Discount (%) | Rating (%) | Review Count |'; + let divider = '| :- | :- | -: | -: | -: | -: |'; let result = header + NEW_LINE + divider + NEW_LINE; let formattedData = []; for (let app of searchData) { let formatted = formatAppData(app); - result += `| ${formatted.platformAbbreviated} | ${formatted.titleLink} | ${formatted.price} | ${formatted.percentOff} | ${formatted.reviews} | ${formatted.reviewsCount} | ${formatted.countdownText} | ${formatted.countdownTime}` + NEW_LINE; + result += `| ${formatted.platformAbbreviated} | ${formatted.titleLink} | ${formatted.price} | ${formatted.percentOff} | ${formatted.reviews} | ${formatted.reviewsCount} |` + NEW_LINE; formattedData.push(formatted); } From da024d67d7c47008773790ecb802a3ba506a6c1a Mon Sep 17 00:00:00 2001 From: Kevin Novak Date: Fri, 22 Nov 2019 17:41:09 -0500 Subject: [PATCH 22/22] Download data as CSV --- public/index.html | 2 ++ public/scripts/index.js | 21 +++++++++++++++++++++ services/steam-scraper.js | 6 +++--- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/public/index.html b/public/index.html index 698849e..5bcf5c8 100644 --- a/public/index.html +++ b/public/index.html @@ -264,6 +264,8 @@
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous" > + + diff --git a/public/scripts/index.js b/public/scripts/index.js index b588fd1..05246cd 100644 --- a/public/scripts/index.js +++ b/public/scripts/index.js @@ -150,8 +150,17 @@ async function retrieveSteamSearchTable() { textArea.readOnly = true; textArea.innerHTML = text; + let csv = json2csv.parse(cache.searchData); + + let downloadLink = document.createElement('a'); + downloadLink.href = 'data:text/csv;charset=utf-8,' + encodeURI(csv); + downloadLink.target = '_blank'; + downloadLink.innerHTML = 'Download Raw Data as CSV'; + downloadLink.download = `steam-data-${getFormattedTime()}.csv`; + searchResultsDiv.innerHTML = ""; searchResultsDiv.appendChild(textArea); + searchResultsDiv.appendChild(downloadLink); } catch (error) { console.error(error); searchResultsDiv.innerHTML = "No results."; @@ -160,6 +169,18 @@ async function retrieveSteamSearchTable() { retrieveSearchButton.disabled = false; } +function getFormattedTime() { + let today = new Date(); + let y = today.getFullYear(); + // JavaScript months are 0-based. + let m = today.getMonth() + 1; + let d = today.getDate(); + let h = today.getHours(); + let mi = today.getMinutes(); + let s = today.getSeconds(); + return y + "-" + m + "-" + d + "-" + h + "-" + mi + "-" + s; +} + function formatAppData(app) { let formattedData = { type: "", diff --git a/services/steam-scraper.js b/services/steam-scraper.js index d566727..838bdfd 100644 --- a/services/steam-scraper.js +++ b/services/steam-scraper.js @@ -18,7 +18,7 @@ async function getSearchPageData(searchUrl) { let searchResults = Array.from($('#search_resultsRows > a.search_result_row')); let searchPageData = []; - for (var searchResult of searchResults) { + for (let searchResult of searchResults) { let gameData = await getGameDataFromSearchResult(searchResult); searchPageData.push(gameData); } @@ -85,7 +85,7 @@ function getHeadsets($) { let headsets = []; - for (var headsetElement of headsetElements) { + for (let headsetElement of headsetElements) { let headsetName = $('.name', headsetElement).text().trim(); if (headsetName) { headsets.push(headsetName); @@ -171,7 +171,7 @@ function getGameDataFromGameElement(gameElement) { } let title = $('.game_area_purchase_game > h1').children().remove().end().text().trim(); - for (var removeKeyword of TITLE_REMOVE) { + for (let removeKeyword of TITLE_REMOVE) { if (title.startsWith(removeKeyword)) { title = title.substr(removeKeyword.length).trim(); }