Skip to content

Commit

Permalink
feat: implement pagination feature in inquirer with turing pages
Browse files Browse the repository at this point in the history
[p] [page_num] now enables users to navigate through pages efficiently.

feat: add LianHuanHua Zhihu/Toutiao scraping
  • Loading branch information
woniuzfb committed Mar 17, 2024
1 parent dc85994 commit 60b29b7
Show file tree
Hide file tree
Showing 12 changed files with 1,963 additions and 250 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ wget https://woniuzfb.github.io/iptv/iptv.sh && bash iptv.sh
./debug lhh
```

- 1w+ 某某分享 (知乎 + 头条)
- 某某全站 (号称7成连环画?)

## Dev

v2.0.0 broken atm
Expand Down
2 changes: 1 addition & 1 deletion build
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ReplaceInclude()
echo "${2:-}if [ \"\$self\" == \"tv\" ] || [ \"\$self\" == \"iptv\" ]"
echo "${2:-}then"
ReplaceInclude src/tv " ${2:-}"
bins=(v2 x nx or pve arm ibm cf cx ali)
bins=(v2 x nx or pve arm ibm cf cx ali lhh)
for bin in "${bins[@]}"
do
echo "${2:-}elif [ \"\$self\" == \"$bin\" ]"
Expand Down
10 changes: 5 additions & 5 deletions core
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,14 @@ SetDelimiters()

RandStr()
{
str_size=8
str_array=(
local str_size=${1:-8}
local str_array=(
q w e r t y u i o p a s d f g h j k l z x c v b n m
Q W E R T Y U I O P A S D F G H J K L Z X C V B N M
)
str_array_size=${#str_array[*]}
str_len=0
rand_str=""
local str_array_size=${#str_array[*]}
local str_len=0
local rand_str="" str_index
while [[ $str_len -lt $str_size ]]
do
str_index=$((RANDOM%str_array_size))
Expand Down
14 changes: 11 additions & 3 deletions env
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,15 @@ LOG_FILE="$HOME"/iptv.log
JQ_FILE=/usr/local/bin/jq
FFMPEG_FILE=/usr/local/bin/ffmpeg
FFPROBE_FILE=/usr/local/bin/ffprobe
FFMPEG_ROOT="$HOME"
CURL_IMPERSONATE_FILE=/usr/local/bin/curl-impersonate

# FFmpeg
FFMPEG_ROOT="$HOME"

# node
NODE_ROOT="$HOME"/iptv_sh_node
NODE_TOUTIAO_LINK=https://raw.githubusercontent.com/woniuzfb/iptv/main/scripts/toutiao

# iptv
IPTV_ROOT=/usr/local/iptv
FFMPEG_LOG_ROOT="$IPTV_ROOT"/ffmpeg
Expand All @@ -45,7 +51,6 @@ LOGROTATE_CONFIG="$IPTV_ROOT"/logrotate
CRON_FILE="$IPTV_ROOT"/cron
XTREAM_CODES="$IPTV_ROOT"/xtream_codes
XTREAM_CODES_EXAM="$IPTV_ROOT"/xtream_codes_exam
NODE_ROOT="$IPTV_ROOT"/node
IP_DENY="$IPTV_ROOT"/ip.deny
IP_LOG="$IPTV_ROOT"/ip.log
LIVE_ROOT="$IPTV_ROOT"/live
Expand All @@ -62,7 +67,7 @@ MD5SUM_LINK_FALLBACK="$FFMPEG_MIRROR_LINK"/md5sum.c
DEFAULT_DEMOS=default.json
TS_CHANNELS=channels.json
XTREAM_CODES_CHANNELS=xtream_codes
USER_AGENT_BROWSER="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36"
USER_AGENT_BROWSER="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
USER_AGENT_TV="Mozilla/5.0 (QtEmbedded; U; Linux; C) AppleWebKit/533.3 (KHTML, like Gecko) MAG200 stbapp ver: 2 rev: 250 Safari/533.3"
USER_AGENT_PHONE="iPhone; CPU iPhone OS 15_2 like Mac OS X"

Expand Down Expand Up @@ -95,3 +100,6 @@ IBM_CONFIG="$HOME"/ibm.json

# Alist
ALIST_FILE=/usr/local/bin/ali

# LianHuanHua
LIANHUANHUA_FILE=/usr/local/bin/lhh
210 changes: 210 additions & 0 deletions scripts/toutiao/browser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
const { chromium, devices } = require('playwright-chromium');
const desktop = devices["Desktop Chrome"];

const url = process.argv[2];
const cookies = process.argv[3];

const RESOURCE_EXCLUSIONS = ['image', 'stylesheet', 'media', 'font'];
const URL_EXCLUSIONS = [/zijieapi/, /log-sdk/, /secsdk-captcha/, /ibytedapm.com\/slardar\/fe\/sdk-web\/plugins/, /www.toutiao.com\/ttwid\/check/, /www.toutiao.com\/ttwid\/report_fingerprint/, /helpdesk.bytedance.com/];

class Utils {
static getRandomInt(a, b) {
const min = Math.min(a, b);
const max = Math.max(a, b);
const diff = max - min + 1;
return min + Math.floor(Math.random() * Math.floor(diff));
}

static isUrl(url) {
try {
new URL(url);
return true;
} catch (e) {
return false;
}
}
}

class Signer {
userAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
args = [
"--disable-blink-features",
"--disable-blink-features=AutomationControlled",
"--disable-infobars",
"--window-size=1920,1080",
"--start-maximized",
];

default_url = "https://www.toutiao.com/c/user/token/MS4wLjABAAAAB2ev2na9xNHI8py8dnKvLnSYGkxaFKDtOQTqbR7nsEzpNhwVqpCY8ZZz6s7qA8vt/";

constructor(default_url, userAgent, browser) {
if (default_url) {
this.default_url = default_url;
}
if (userAgent) {
this.userAgent = userAgent;
}

if (browser) {
this.browser = browser;
this.isExternalBrowser = true;
}

this.args.push(`--user-agent="${this.userAgent}"`);

this.options = {
headless: true,
args: this.args,
ignoreDefaultArgs: ["--mute-audio", "--hide-scrollbars"],
ignoreHTTPSErrors: true,
};
}

async init() {
try {
if (!this.browser) {
this.browser = await chromium.launch(this.options);
}

let emulateTemplate = {
...desktop,
locale: "en-US",
isMobile: false,
hasTouch: false,
userAgent: this.userAgent,
};

emulateTemplate.viewport.width = Utils.getRandomInt(1280, 1920);
emulateTemplate.viewport.height = Utils.getRandomInt(1280, 1920);

this.context = await this.browser.newContext({
bypassCSP: true,
devtools: true,
...emulateTemplate,
});

this.page = await this.context.newPage();

await this.page.route("**/*", (route) => {
const url = route.request().url();
const isExcluded = URL_EXCLUSIONS.some(regex => regex.test(url));
if (RESOURCE_EXCLUSIONS.includes(route.request().resourceType()) || isExcluded) {
return route.abort();
}
return route.continue();
});

const getinfoRequestPromise = this.page.waitForRequest('https://xxbg.snssdk.com/websdk/v1/getInfo*');
const feedRequestPromise = this.page.waitForRequest('https://www.toutiao.com/api/pc/list/user/feed?category=profile_all*');

await this.page.goto(this.default_url, {
waitUntil: "networkidle",
});

const getinfoRequest = await getinfoRequestPromise;
const feedRequest = await feedRequestPromise;

await this.page.evaluate(() => {
window.generateSignature = function generateSignature(url,string) {
if (typeof window.byted_acrawler.sign !== "function") {
throw "No signature function found";
}
return string ? window.byted_acrawler.sign("", string) : window.byted_acrawler.sign({ url: url });
};

window.generateBogus = function generateBogus(params) {
if (typeof window.byted_acrawler.generateBogus !== "function") {
throw "No X-Bogus function found";
}
return window.byted_acrawler.generateBogus(params);
};

return this;
});

let LOAD_SCRIPTS = ["xbogus.js"];
await Promise.all(LOAD_SCRIPTS.map(async (script) => {
await this.page.addScriptTag({
path: `${__dirname}/${script}`,
});
//console.log("[+] " + script + " loaded");
}));

return {
feed: {
url: feedRequest.url(),
headers: feedRequest.headers(),
},
getinfo: {
url: getinfoRequest.url(),
headers: getinfoRequest.headers(),
},
};
} catch (error) {
process.exit(1);
}
}

async sign(link, cookies) {
await this.context.addCookies(cookies);
let token;
if (Utils.isUrl(link)) {
token = await this.page.evaluate(`generateSignature("","${link}")`);
let signed_url = link + "&_signature=" + token;
let queryString = new URL(signed_url).searchParams.toString();
let bogus = await this.page.evaluate(`generateBogus("${queryString}","${this.userAgent}")`);
return {
signature: token,
"x-bogus": bogus,
signed_url: signed_url,
};
}
token = await this.page.evaluate(`generateSignature("${link}")`);
return {
signature: token,
};
}

async close() {
if (this.browser && !this.isExternalBrowser) {
await this.browser.close();
this.browser = null;
}
if (this.page) {
this.page = null;
}
}
}

(async function main() {
try {
const signer = new Signer();
const init = await signer.init();
let output;

if (cookies) {
const sign = await signer.sign(url, JSON.parse(cookies));

output = JSON.stringify({
status: "ok",
data: {
...init,
...sign,
},
});
} else {
output = JSON.stringify({
status: "ok",
data: {
...init,
},
});
}

console.log(output);
await signer.close();
} catch (err) {
console.error(err);
}
})();
Loading

0 comments on commit 60b29b7

Please sign in to comment.