From e4f5e185761dcfc77009768fcba6f838579dcd1f Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 12 Feb 2021 14:48:51 +0100 Subject: [PATCH] docs: reading main page name via zimdump info This requires zim-tools 2.2.0 or later License: MIT Signed-off-by: Marcin Rataj --- README.md | 19 +++++++++++++++++-- src/article-transforms.ts | 30 +++++------------------------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 4f92cc9..7ba328c 100644 --- a/README.md +++ b/README.md @@ -149,13 +149,28 @@ $ node ./bin/run --help First though the main page, as the archive appears on the appropriate wikimedia website, must be determined. For instance, the zim file for Turkish Wikipedia has a main page of `Kullanıcı:The_other_Kiwix_guy/Landing` but `https://tr.wikipedia.org` uses `Anasayfa` as the main page. Both must be passed to the node script. -To determine the website main page use `./tools/find_main_page_name.sh` passing the website: +To determine the original main page use `./tools/find_main_page_name.sh`: -```sh +```console $ ./tools/find_main_page_name.sh tr.wikiquote.org Anasayfa ``` +To determine the main page in ZIM file open in in a [Kiwix reader](https://www.kiwix.org/en/kiwix-reader) or use `zimdump info` (version 2.2.0 or later) and ignore the `A/` prefix: + +```console +$ zimdump info wikipedia_tr_all_maxi_2021-01.zim +count-entries: 1088190 +uuid: 840fc82f-8f14-e11e-c185-6112dba6782e +cluster count: 5288 +checksum: 50113b4f4ef5ddb62596d361e0707f79 +main page: A/Kullanıcı:The_other_Kiwix_guy/Landing +favicon: -/favicon + +$ zimdump info wikipedia_tr_all_maxi_2021-01.zim | grep -oP 'main page: A/\K\S+' +Kullanıcı:The_other_Kiwix_guy/Landing +``` + The conversion is done on the unpacked zim directory: ```sh diff --git a/src/article-transforms.ts b/src/article-transforms.ts index e8fade3..a561853 100644 --- a/src/article-transforms.ts +++ b/src/article-transforms.ts @@ -43,32 +43,8 @@ export const appendFooter = ($html: any, options: EnhancedOpts) => { } export const appendHtmlPostfix = (href: string) => { + // noop: .html no longer needed since we switched to zimdump return href - /* .html no longer needed since we switched to zimdump - if (href.includes('/w/index.php')) { - return href - } - - const parts = href.split(/[#?]+/) - - if (parts.length === 0) { - throw new Error('Unexpected parsing of links') - } - - if (parts.length === 1) { - if (href.endsWith('.html')) { - return href - } - - return `${href}.html` - } - - if (parts[0].endsWith('.html')) { - return href - } - - return href.replace(parts[0], `${parts[0]}.html`) - */ } export const prefixRelativeRoot = (href: string) => { @@ -83,6 +59,10 @@ export const moveRelativeLinksUpOneLevel = (href: string) => { return href.replace('../', '') } +export const moveRelativeLinksDownOneLevel = (href: string) => { + return href.replace('../', '../../') +} + export const makeScriptLinksRelativeToWiki = (href: string) => { if (!href.startsWith('-/')) { return href