Skip to content

Commit

Permalink
Follow meta-refresh redirect
Browse files Browse the repository at this point in the history
  • Loading branch information
Giorgio Premi committed Jul 31, 2017
1 parent 5c2477f commit 494d000
Showing 1 changed file with 98 additions and 2 deletions.
100 changes: 98 additions & 2 deletions stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,106 @@ page.viewportSize = {
page.customHeaders = opts.headers || {};
page.zoomFactor = opts.scale;

page.open(opts.url, function (status) {
var previousCount = 0;
var previousPage = null;

function pageOnLoadFinished(status) {
if (status === 'fail') {
console.error('Couldn\'t load url: ' + opts.url);
phantom.exit(1);
return;
}

if (previousPage === page.url) {
if (++ previousCount > 3) {
console.error('Loop detected: ' + page.url);
phantom.exit(1);
return;
}
} else {
previousPage = page.url;
previousCount = 0;
}

var threshold = 10;
var refresh = page.evaluate(function(threshold) {
var patternMetaContent = /^\s*(\d+)(?:\s*;(?:\s*url\s*=)?\s*(.+)?)?$/i;
var parseMetaRefresh = function (content) {
// base code from https://github.com/stevenvachon/http-equiv-refresh
var result = { timeout: null, url: null };

content = patternMetaContent.exec(content);

if (content === null) {
return result;
}

if (content[1] !== undefined) {
result.timeout = parseInt( content[1] );
}

if (content[2] !== undefined) {
var url = (content[2] + '').trim();

if (url.length) {
var firstChar = url[0];
var lastChar = url[url.length-1];

// Remove a single level of encapsulating quotes
if (firstChar==="'" && lastChar==="'" || firstChar==='"' && lastChar==='"') {
if (url.length > 2) {
url = url.substr(1, url.length-2).trim();
}
}
}

if (url.length) {
result.url = url;
}
}

return result;
};

var metas = [];
var tags = document.head.querySelectorAll('[http-equiv="refresh"]');
for (var i = 0, len = tags.length; i < len; ++ i) {
if (tags[i].tagName === 'META') {
metas.push(tags[i].content || tags[i].CONTENT);
}
};

var refresh = null;
var minTime = Number.POSITIVE_INFINITY;
var i = 0;

for (var i = 0, len = metas.length; i < len; ++ i) {
var currRefresh = parseMetaRefresh(metas[i]);

if (currRefresh.timeout <= threshold && currRefresh.timeout < minTime) {
minTime = currRefresh.timeout;
refreshUrl = currRefresh; // currRefresh.url could be null
}
}

return refreshUrl;
}, threshold);

if (null !== refresh) {
page.onLoadFinished = pageOnLoadFinished;

if (refresh.timeout > 0) { // when is 0, is already triggered by phantom apparently
page.evaluate(function (refresh) {
if (null === refresh.url) {
window.location.reload();
} else {
window.location.replace(refresh.url);
}
}, refresh);
}
return;
}

if (opts.crop) {
page.clipRect = {
top: 0,
Expand Down Expand Up @@ -124,4 +217,7 @@ page.open(opts.url, function (status) {
log.call(console, page.renderBase64(opts.format));
phantom.exit();
}, opts.delay * 1000);
});
}

page.onLoadFinished = pageOnLoadFinished;
page.open(opts.url);

0 comments on commit 494d000

Please sign in to comment.