diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 00000000..211e7900 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,65 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Deploy static content to Pages + +on: + # Runs on pushes targeting the branch + push: + branches: + - master + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Single deploy job since we're just deploying + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.12"] + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: master + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python dependencies + run: | + python -m pip install . && + python -m pip install -r scripts/requirements.txt + + - name: Build static site contents + run: | + python scripts/build.py + + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: '_public' + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 2456f416..52fc8ce2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ __pycache__/ build/ dist/ *.egg-info/ +_public/ # Compiled dictionary *.list diff --git a/README.md b/README.md index d7c788fe..63a31530 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,14 @@ converter.convert_file(input=None, output=None */ ``` +## 線上版 + +[簡繁祕書線上版](https://danny0838.github.io/sts-lib/) + +本線上轉換工具支援文字轉換及檔案轉換。前者只要在輸入區填入文字,就會自動轉換並且可以互動式校訂。後者可以用按鈕或拖放選擇一或多個檔案,就會逐一轉換後自動下載。預設檔案輸入輸出編碼皆是UTF-8,如要輸入其他編碼的檔案,可在進階選項設定。 + +目前內建 [OpenCC](https://github.com/BYVoid/OpenCC) 的轉換方案,並且修正了 OpenCC 演算法缺陷導致一些地區詞無法正常轉換的問題(詳見[相關問題回報](https://github.com/BYVoid/OpenCC/issues/475))。未來有機會再擴充,如中文維基百科的轉換詞庫。 + ## License 許可協議 本專案以 Apache License 2.0 協議授權使用。 diff --git a/scripts/build.py b/scripts/build.py index e162bc46..dc0743ed 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -41,6 +41,40 @@ def main(): tpl = env.get_template('index_single.html') render_on_demand(file, tpl, single_page=True) + # build static site contents under PUBLIC_DIR + www_dir = os.path.join(root_dir, PUBLIC_DIR) + os.makedirs(www_dir, exist_ok=True) + + file = os.path.join(www_dir, 'index.html') + tpl = env.get_template('index.html') + render_on_demand(file, tpl) + + file = os.path.join(www_dir, 'index.css') + tpl = env.get_template('index.css') + render_on_demand(file, tpl) + + file = os.path.join(www_dir, 'index.js') + tpl = env.get_template('index.js') + render_on_demand(file, tpl) + + file = os.path.join(www_dir, 'sts.js') + tpl = env.get_template('sts.js') + render_on_demand(file, tpl) + + # -- compile *.tlist + dicts_dir = os.path.join(www_dir, 'dicts', 'opencc') + os.makedirs(dicts_dir, exist_ok=True) + maker = StsMaker() + config_files = os.path.join(glob.escape(StsMaker.config_dir), '[!_]*.json') + for config_file in glob.iglob(config_files): + file = maker.make(config_file, quiet=True) + basename = os.path.basename(file) + dest = os.path.join(dicts_dir, basename) + + if not os.path.isfile(dest) or os.path.getmtime(file) > os.path.getmtime(dest): + print(f'updating: {dest}') + shutil.copyfile(file, dest) + if __name__ == '__main__': main() diff --git a/sts/data/htmlpage/index.css b/sts/data/htmlpage/index.css index 9f850102..f54f7c62 100644 --- a/sts/data/htmlpage/index.css +++ b/sts/data/htmlpage/index.css @@ -30,3 +30,8 @@ body > footer { margin-top: 1em; text-align: center; font-size: small; } .popup a { padding: .3em; cursor: pointer; } .popup a:hover { background-color: #ccc; } .popup a:not([tabindex="0"])::before { content: attr(tabindex) "."; } + +{%- if not single_page %} +#panel section { margin: .5em auto; } +#panel textarea { width: 100%; height: 30vh; min-height: 120px; box-sizing: border-box; } +{%- endif %} diff --git a/sts/data/htmlpage/index.html b/sts/data/htmlpage/index.html index 73b5f653..9954612e 100644 --- a/sts/data/htmlpage/index.html +++ b/sts/data/htmlpage/index.html @@ -4,16 +4,64 @@ {%- block head %} -{% block title %}{% endblock title %} +{% block title %}簡繁祕書線上版 v2.0.0{% endblock title %} {%- block styles %} + {%- endblock styles %} {%- block scripts %} + + {%- endblock scripts %} {%- endblock head %} {%- block viewer %} + {%- endblock viewer %} +{%- block panel %} +
+
+
+ +
+
+ + + + + + + + +
+
+
+{%- endblock panel %} {%- block help %}
操作說明 @@ -44,6 +92,8 @@

操作鍵

{%- endblock help %} diff --git a/sts/data/htmlpage/index.js b/sts/data/htmlpage/index.js index 14505550..531fdf26 100644 --- a/sts/data/htmlpage/index.js +++ b/sts/data/htmlpage/index.js @@ -624,3 +624,231 @@ document.addEventListener('DOMContentLoaded', (event) => { const target = viewer.querySelector('a.unchecked'); if (target) { target.focus(); } }); + +{%- if not single_page %} + +const excludeRegexPattern = /^\/(.*)\/([a-z]*)$/; + +function parseExcludePattern(exclude) { + if (!exclude) { return null; } + const m = excludeRegexPattern.exec(exclude); + if (!m) { throw new Error(`invalid regex string for exclude pattern: ${exclude}`); } + const source = m[1]; + let flags = new Set(m[2]); + flags.add('g'); + flags.delete('y'); + flags = [...flags.values()].join(''); + return new RegExp(source, flags); +} + +function parseExcludePatternSafely(exclude) { + try { + return parseExcludePattern(exclude) + } catch (ex) { + console.error(ex); + return null; + } +} + +async function loadDict(mode) { + const url = `dicts/${mode}.tlist`; + return await sts.StsDict.load(url); +} + +async function convertText(text, mode, exclude) { + const dict = await loadDict(mode); + const timeStart = performance.now(); + const result = await dict.convertText(text, parseExcludePatternSafely(exclude)); + console.log(`convert (bytes=${text.length}, mode=${mode}): ${performance.now() - timeStart} ms`); + return result; +} + +async function convertHtml(text, mode, exclude) { + const dict = await loadDict(mode); + const timeStart = performance.now(); + const html = dict.convertHtml(text, parseExcludePatternSafely(exclude)); + const wrapper = document.getElementById('viewer'); + wrapper.innerHTML = html; + console.log(`convertHtml (bytes=${text.length}, mode=${mode}): ${performance.now() - timeStart} ms`); + wrapper.hidden = false; + wrapper.scrollIntoView(); + + let a = wrapper.querySelector('a.unchecked'); + if (a) { a.focus(); return; } +} + +async function convertFile(file, mode, exclude, charset) { + const text = await readFileAsText(file, charset); + const result = await convertText(text, mode, exclude); + const fileNew = new File([result], file.name, {type: 'text/plain'}); + downloadFile(fileNew); +} + +async function readFileAsText(blob, charset = 'utf-8') { + const event = await new Promise((resolve, reject) => { + let reader = new FileReader(); + reader.onload = resolve; + reader.onerror = reject; + reader.readAsText(blob, charset); + }); + return event.target.result; +} + +function downloadFile(file) { + const a = document.createElement('a'); + a.download = file.name; + a.href = URL.createObjectURL(file); + document.body.appendChild(a); + a.click(); + a.remove(); +} + +async function showAdvancedOptions(formElem) { + if (typeof HTMLDialogElement === 'undefined') { + alert('瀏覽器不支援 對話方塊元素'); + return; + } + + const dialog = document.createElement('dialog'); + const form = dialog.appendChild(document.createElement('form')); + form.method = 'dialog'; + + const header = form.appendChild(document.createElement('header')); + header.textContent = '進階設定:'; + + const section = form.appendChild(document.createElement('section')); + { + const option = 'exclude-pattern'; + const subsection = section.appendChild(document.createElement('section')); + const label = subsection.appendChild(document.createElement('label')); + label.textContent = '忽略轉換的正規表示式:'; + const textarea = subsection.appendChild(document.createElement('textarea')); + textarea.name = option; + textarea.value = formElem[option].value; + textarea.placeholder = '/pattern/flags\n傳回匹配值或子群組 return, return1, …的值'; + } + // { + // const option = 'custom-dict'; + // const subsection = section.appendChild(document.createElement('section')); + // const label = subsection.appendChild(document.createElement('label')); + // label.textContent = '自訂轉換詞典: '; + // const textarea = subsection.appendChild(document.createElement('textarea')); + // textarea.name = option; + // textarea.value = formElem[option].value; + // } + { + const option = 'convert-file-charset'; + const subsection = section.appendChild(document.createElement('section')); + const label = subsection.appendChild(document.createElement('label')); + label.textContent = '轉換檔案輸入編碼:'; + const input = subsection.appendChild(document.createElement('input')); + input.name = option; + input.type = 'text'; + input.value = formElem[option].value; + input.placeholder = 'UTF-8'; + } + + const footer = form.appendChild(document.createElement('footer')); + const submitBtn = footer.appendChild(document.createElement('input')); + submitBtn.type = 'submit'; + submitBtn.value = '確認'; + const cancelBtn = footer.appendChild(document.createElement('input')); + cancelBtn.type = 'button'; + cancelBtn.value = '取消'; + cancelBtn.addEventListener('click', (event) => { + dialog.close(''); + }); + + const result = await new Promise((resolve, reject) => { + dialog.addEventListener('close', (event) => { + resolve(dialog.returnValue); + }); + document.body.appendChild(dialog); + dialog.showModal(); + }); + dialog.remove(); + + if (!result) { return; } + for (const elem of dialog.querySelectorAll('[name]')) { + formElem[elem.name].value = elem.value; + } +} + +document.addEventListener('DOMContentLoaded', function (event) { + const panel = document.getElementById('panel'); + const form = panel.querySelector('form'); + + form.addEventListener('submit', (event) => { + event.preventDefault(); + convertHtml(form.input.value, form.method.value, form['exclude-pattern'].value); + }); + + form['input'].addEventListener('dragover', (event) => { + event.preventDefault(); // required to allow drop + event.dataTransfer.dropEffect = 'copy'; + }); + + form['input'].addEventListener('drop', async (event) => { + async function handleEntry(entry) { + if (entry.isFile) { + let file = await new Promise((resolve, reject) => { + entry.file(resolve, reject); + }); + const {type, lastModified} = file; + file = new File([file], entry.fullPath.slice(1) || file.name, {type, lastModified}); + await convertFile(file, mode, exclude, charset); + return; + } + + // load all subentries into entries + let entries = []; + { + const reader = entry.createReader(); + let subentries; + do { + subentries = await new Promise((resolve, reject) => { + reader.readEntries(resolve, reject); + }); + entries = entries.concat(subentries); + } while (subentries.length) + } + + // handle loaded entries + for (const entry of entries) { + await handleEntry(entry); + } + } + + event.preventDefault(); + const entries = Array.prototype.map.call( + event.dataTransfer.items, + x => x.webkitGetAsEntry && x.webkitGetAsEntry() + ); + const mode = form.method.value; + const exclude = form['exclude-pattern'].value; + const charset = form['convert-file-charset'].value; + for (const entry of entries) { + await handleEntry(entry); + } + }); + + form['convert-file'].addEventListener('click', (event) => { + event.preventDefault(); + form['convert-file-input'].value = null; + form['convert-file-input'].click(); + }); + + form['convert-file-input'].addEventListener('change', async (event) => { + event.preventDefault(); + const files = Array.from(event.target.files); + if (!(files && files.length)) { return; } + await convertFile(files[0], form.method.value, form['exclude-pattern'].value, form['convert-file-charset'].value); + }); + + form.advanced.addEventListener('click', (event) => { + event.preventDefault(); + showAdvancedOptions(form); + }); +}); + +{%- endif %} diff --git a/sts/data/htmlpage/sts.js b/sts/data/htmlpage/sts.js new file mode 100644 index 00000000..fe308d0b --- /dev/null +++ b/sts/data/htmlpage/sts.js @@ -0,0 +1,273 @@ +(function (global, factory) { + if (typeof exports === "object" && typeof module === "object") { + // CommonJS + module.exports = factory(); + } else if (typeof define === "function" && define.amd) { + // AMD + define(factory); + } else { + // Browser globals + global = typeof globalThis !== "undefined" ? globalThis : global || self; + global.sts = factory(); + } +}(this, function () { + + 'use strict'; + + class Unicode { + static compositeLength(text, pos) { + let i = pos; + let total = text.length; + let length = 1; + let isIds = false; + while (length && (i < total)) { + let code = text.codePointAt(i); + + // check if the current char is a prefix composer + if (code === 0x303E) { + // ideographic variation indicator + isIds = true; + length += 1; + } else if ((0x2FF0 <= code && code <= 0x2FF1) || (0x2FF4 <= code && code <= 0x2FFB)) { + // IDS binary operator + isIds = true; + length += 2; + } else if (0x2FF2 <= code && code <= 0x2FF3) { + // IDS trinary operator + isIds = true; + length += 3; + } else if (isIds && !( + (0x4E00 <= code && code <= 0x9FFF) // CJK unified + || (0x3400 <= code && code <= 0x4DBF) || (0x20000 <= code && code <= 0x3FFFF) // Ext-A, ExtB+ + || (0xF900 <= code && code <= 0xFAFF) || (0x2F800 <= code && code <= 0x2FA1F) // Compatibility + || (0x2E80 <= code && code <= 0x2FDF) // Radical + || (0x31C0 <= code && code <= 0x31EF) // Stroke + || (0xE000 <= code && code <= 0xF8FF) || (0xF0000 <= code && code <= 0x1FFFFF) // Private + || (code == 0xFF1F) // ? + || (0xFE00 <= code && code <= 0xFE0F) || (0xE0100 <= code && code <= 0xE01EF) // VS + )) { + // check for a valid IDS to avoid a breaking on e.g.: + // + // IDS包括⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻,可用於… + // + // - IDS := Ideographic | Radical | CJK_Stroke | Private Use + // | U+FF1F | IDS_BinaryOperator IDS IDS + // | IDS_TrinaryOperator IDS IDS IDS + // + // - We also allow IVI and VS in an IDS. + break; + } + + i += (code > 0xFFFF) ? 2 : 1; + + // check if the next char is a postfix composer + if (i < total) { + code = text.codePointAt(i); + if (0xFE00 <= code && code <= 0xFE0F) { + // variation selectors + length += 1; + } else if (0xE0100 <= code && code <= 0xE01EF) { + // variation selectors supplement + length += 1; + } else if (0x180B <= code && code <= 0x180D) { + // Mongolian free variation selectors + length += 1; + } else if (0x0300 <= code && code <= 0x036F) { + // combining diacritical marks + length += 1; + } else if (0x1AB0 <= code && code <= 0x1AFF) { + // combining diacritical marks extended + length += 1; + } else if (0x1DC0 <= code && code <= 0x1DFF) { + // combining diacritical marks supplement + length += 1; + } else if (0x20D0 <= code && code <= 0x20FF) { + // combining diacritical marks for symbols + length += 1; + } else if (0xFE20 <= code && code <= 0xFE2F) { + // combining half marks + length += 1; + } + } + + length--; + } + return i - pos; + } + + static split(text) { + const rv = []; + let i = 0; + let total = text.length; + while (i < total) { + const len = Unicode.compositeLength(text, i); + rv.push(text.slice(i, i + len)); + i += len; + } + return rv; + } + } + + function escapeHtml(...args) { + const regex = /[&<"]/g; + const func = m => map[m]; + const map = { + "&": "&", + "<": "<", + '"': """, + }; + + escapeHtml = function escapeHtml(str) { + return str.replace(regex, func); + } + + return escapeHtml(...args); + } + + const excludeReturnGroupPattern = /^return\d*$/; + + class StsDict { + constructor(dict) { + this.dict = dict; + } + + static async load(url) { + const response = await fetch(url); + const dict = await response.json(); + return new StsDict(dict); + } + + normalize(input) { + if (typeof input === 'string') { + return Unicode.split(input); + } + return input; + } + + match(input, pos, maxpos=Infinity) { + input = this.normalize(input) + let trie = this.dict; + let i = pos; + let total = Math.min(input.length, maxpos); + let match = null; + let matchEnd = null; + while (i < total) { + trie = trie[input[i]]; + if (!trie) { + break; + } + const values = trie['']; + if (values) { + match = values; + matchEnd = i + 1; + } + i++; + } + if (match) { + return { + conv: { + key: input.slice(pos, matchEnd), + values: match, + }, + start: pos, + end: matchEnd, + }; + } + return null; + } + + *apply(input) { + input = this.normalize(input); + let i = 0; + let total = input.length; + while (i < total) { + let match = this.match(input, i); + if (match) { + yield match.conv; + i = match.end; + } else { + yield input[i]; + i++; + } + } + } + + *convert(text, exclude) { + if (!exclude) { + yield* this.apply(text); + return; + } + + yield* this.convertWithFilter(text, exclude); + } + + *convertWithFilter(text, exclude) { + let index = 0; + let m; + let t; + exclude.lastIndex = 0; + while (m = exclude.exec(text)) { + const start = m.index; + const end = exclude.lastIndex; + + t = text.slice(index, start); + if (t) { yield* this.apply(t); } + + t = m[0]; + for (const key in m.groups) { + if (!excludeReturnGroupPattern.test(key)) { continue; } + const value = m.groups[key]; + if (typeof value === 'undefined') { continue; } + t = value; + } + if (t) { yield t; } + + index = end; + } + + t = text.slice(index); + if (t) { yield* this.apply(t); } + } + + convertText(input, exclude) { + const rv = []; + for (const part of this.convert(input, exclude)) { + if (typeof part === 'string') { + rv.push(part); + continue; + } + + rv.push(part.values[0]); + } + return rv.join(''); + } + + convertHtml(input, exclude) { + const rv = []; + for (const part of this.convert(input, exclude)) { + if (typeof part === 'string') { + rv.push(escapeHtml(part)); + continue; + } + + const atomic = part.key.length === 1; + const cls = (atomic && part.values.length <= 1) ? 'single' : 'unchecked'; + + rv.push(``); + rv.push(``); + for (let i = 0, I = part.values.length; i < I; i++) { + const value = part.values[i]; + rv.push(`${escapeHtml(value)}`); + } + rv.push(``); + } + return rv.join(''); + } + } + + return { + Unicode, + StsDict, + }; + +})); \ No newline at end of file