Skip to content

Commit

Permalink
Deprecate using transcript page as file naming source
Browse files Browse the repository at this point in the history
As per the exploit mentioned in 0f595a7, 
it is decided to shift file and folder generation to use data provided in the course info.

Without logging in transcripts can't be taken, therefore breaking the process.

For now, slides ("exercise files") download are being looked into.
  • Loading branch information
kwongtn committed Aug 1, 2020
1 parent 84a8cf5 commit e44e61e
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 195 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ or [![Buy me a coffee via Grab?](https://img.shields.io/badge/-Buy%20me%20a%20co
- Check `preserve log` and `disable cache`.
- Clear the current captured data.
1. In the course page, refresh the page with the description. You should see stuff going in Developer Tools network tab. You have now caputured data for course information output.
1. Navigate to the course "transcript" page. You have now captured data for transcript output.
1. You may now export the HAL file and close the Developer Tool window.
1. Run the program with the following command:
```
Expand All @@ -55,7 +54,7 @@ __[Method 2, untested for resiliency]__ Continuing from previous section (Gettin
```
node ./main.js --videoDownload path_to_HAL_file
```
1. An output of all the video URLs will be in the `./output/videoList.json` file, and if you specified the `--videoDownload` parameter, videos will be downloaded alongside the subtitle files.
1. An output of all the video URLs will be in the `./output/URLs.json` file, and if you specified the `--videoDownload` parameter, videos will be downloaded alongside the subtitle files.
1. <strike> Copy the links and paste into any downloader (or browser window) to download the videos. Do note that you would need to manually rename the files.</strike>
Expand Down
84 changes: 0 additions & 84 deletions functions/converter.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,35 +33,6 @@ function secsConvert(secs) {
return myObject;
}

/**
* Converts object item to SRT string.
* @param {array} transcript Object containing transcript information.
*/
function objToSRT(transcript) {
return new Promise((resolve, reject) => {
var stringBuffer = [];
// console.log(transcript);
// Sort the transcripts so that they are in the correct order
transcript.sort((a, b) => {
return a.seconds - b.seconds;
});

// Outputs the JSON to srt format
transcript.forEach((value, index) => {
stringBuffer += index + "\n";
stringBuffer += value.startTime.hours + ":" + value.startTime.minutes + ":" + value.startTime.seconds.replace(".", ",");
stringBuffer += " --> ";
stringBuffer += value.endTime.hours + ":" + value.endTime.minutes + ":" + value.endTime.seconds.replace(".", ",") + "\n";
stringBuffer += " " + value.text + "\n\n";
});

// console.log(stringBuffer);

resolve(stringBuffer);
})

}

/**
* @param {number} num
* @return {Promise<string>}
Expand All @@ -71,58 +42,3 @@ module.exports.secsConvert = (num) => {
resolve(secsConvert(num));
});
}

module.exports.objToSRT = (obj) => {
// console.log(obj);
return new Promise((resolve, reject) => {
resolve(objToSRT(obj));
})
}

function transcriptToArr(item) {
return new Promise((resolve, reject) => {
let transcript = [];
// Create a transcript object.
item.segments.forEach((segment, index) => {

/**
* @param {Object} startTime Start time of subtitle.
* @param {Object} endTime End time of subtitle.
* @param {number} seconds Time of subtitle in seconds.
* @param {string} text Transcript content.
*/
var line = {
"startTime": {},
"endTime": {},
"seconds": segment.displayTime,
"text": segment.text
};

const startTimePromise = secsConvert(segment.displayTime);

// If it is the last subtitle, add 3 seconds to compute its endtime.
let endTimePromise;
if ((index + 1) == item.segments.length) {
endTimePromise = secsConvert(segment.displayTime + 3);
} else {
endTimePromise = secsConvert(item.segments[index + 1].displayTime);
}

Promise.all([startTimePromise, endTimePromise]).then(promises => {
line.startTime = promises[0];
line.endTime = promises[1];
transcript.push(line);
});

});
resolve(transcript);
});

}

module.exports.transcriptToArr = (item) => {
return new Promise((resolve, reject) => {
resolve(transcriptToArr(item));

})
}
81 changes: 81 additions & 0 deletions functions/fileLister.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
const fs = require('fs');
const converter = require('./converter.js');
const func = require('./functions.js');

function fileNameSanitizer(fileName) {
return fileName
.replace(/\?/g, "")
.replace(/\:/g, "-")
.replace(//g, "")
.replace(/\//g, "")
.replace(/\\/g, "");
}

function generatePaths(courseInfo) {
return new Promise(async (resolve, reject) => {
var videoList = [];
await courseInfo.modules.forEach((module, index) => {
// Output transcript based on folder
var courseIndex = ++index;
var folderName = ".\/output\/" + fileNameSanitizer(courseInfo.title) + "\/";

// Create course output directory if it doesn't exist
if (!fs.existsSync(folderName)) {
try {
fs.mkdirSync(folderName.slice(0, -1));
} catch (err) {
console.log(err);
}
}

// Generate subfolder name.
folderName = folderName.concat(func.numString(courseIndex));
folderName = folderName + " - " + fileNameSanitizer(module.title);

// Generate subfolder
if (!fs.existsSync(folderName)) {
try {
fs.mkdirSync(folderName);
} catch (err) {
console.log(err.message);
}
}

// Add key to folder name.
folderName += "11B42C394C6217C5135BF7E4AC23E";

module.clips.forEach((clip, fileIndex) => {
// Generate file name
var fileName = folderName + "\/";

// Append course index
fileName += func.numString(courseIndex) + ".";

// Check for class index
fileName += func.numString(fileIndex) + " - ";

fileName += fileNameSanitizer(clip.title);

videoList.push(fileName);

});

});

resolve(videoList);
});
}

/**
* Generates output path and writes into videoList.json
* @param {Object} courseInfo
*/
module.exports.generatePaths = (courseInfo) => {
return new Promise(async (resolve, reject) => {
generatePaths(courseInfo).then((videoList) => {
fs.writeFileSync("./output/videoList.json", JSON.stringify(videoList, null, 2));
resolve(videoList);
});

});
}
97 changes: 0 additions & 97 deletions functions/transcript.js

This file was deleted.

23 changes: 11 additions & 12 deletions main.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const transcript = require("./functions/transcript.js");
const courseInfo = require("./functions/courseinfo.js");
const paramProcessor = require("./functions/paramProcessor.js");
const video = require("./functions/video.js");
const filelister = require("./functions/filelister.js");
const exec = require("child_process").execSync;
const fs = require("fs");

Expand All @@ -28,26 +28,25 @@ if (!fs.existsSync("./output")) {
}
}

// Generate transcript and video list.
if (!params.noSubs) {
const searchString = /https:\/\/app\.pluralsight\.com\/learner\/user\/courses.*transcript/;
var obtainedTranscript = false
// Generate video list
if (!params.videoDownload) {
const searchString = /https:\/\/app\.pluralsight\.com\/learner\/content\/courses.*/;
var obtainedVideoList = false;
try {
myJSON.log.entries.forEach((element, index) => {
if (!obtainedTranscript && searchString.test(element.request.url)) {
if (!obtainedCourseInfo && searchString.test(element.request.url)) {
const passedJSON = JSON.parse(element.response.content.text);
transcript.local(passedJSON).then((videoList) => {
fs.writeFileSync("./output/videoList.json", JSON.stringify(videoList, null, 2));
console.log("Completed transcript output, total " + videoList.length + " videos.")
});
obtainedTranscript = true;
filelister.generatePaths(passedJSON);

obtainedVideoList = true;
}
});
} catch (err) {
console.log(err.message);
console.log("If you see this its probably because the HAR file you provided does not have a transcript, or that the format has changed.");
console.log("If you see this its probably because the HAR file you provided does not have course info, or that the format has changed.");
console.log("If you are sure that the format has changed, please attach your HAR file and open an issue here: https://github.com/kwongtn/CourseExtractor/issues");
}

}

// Generate and write courseInfo to output
Expand Down

0 comments on commit e44e61e

Please sign in to comment.