From 7dfad35806f44d6840ca80f51330e755284b4dc0 Mon Sep 17 00:00:00 2001 From: rockbenben Date: Thu, 13 Jun 2024 15:49:09 +0800 Subject: [PATCH] feat: optimize subtitle translation speed and enable batch processing --- .../sublabel-translator/client.tsx | 171 +++++++++--------- src/app/components/translateText.tsx | 20 ++ 2 files changed, 101 insertions(+), 90 deletions(-) diff --git a/src/app/(translate)/sublabel-translator/client.tsx b/src/app/(translate)/sublabel-translator/client.tsx index 33a49e6..bfcd0cc 100644 --- a/src/app/(translate)/sublabel-translator/client.tsx +++ b/src/app/(translate)/sublabel-translator/client.tsx @@ -1,9 +1,10 @@ "use client"; + import React, { useState, useEffect } from "react"; import { Flex, Button, Input, Upload, Form, Space, message, Typography, Select, Modal, Progress, Radio, RadioChangeEvent } from "antd"; import { InboxOutlined } from "@ant-design/icons"; import { languages, translationMethods } from "@/app/components/transalteConstants"; -import { translateText } from "@/app/components/translateText"; +import { splitTextIntoChunks, translateText } from "@/app/components/translateText"; import { copyToClipboard } from "@/app/components/copyToClipboard"; const { Title, Paragraph } = Typography; @@ -81,7 +82,8 @@ const ClientPage = () => { setFile(file); const reader = new FileReader(); reader.onload = (e) => { - setSourceText(e.target?.result as string); + const text = (e.target?.result as string).replace(/\r\n/g, "\n"); + setSourceText(text); }; reader.readAsText(file); return false; @@ -146,15 +148,32 @@ const ClientPage = () => { } }; - const handleTranslate = async () => { + const filterContentLines = (lines: string[]) => { + const contentLines: string[] = []; + const contentIndices: number[] = []; + + lines.forEach((line, index) => { + const isTimecode = /^[\d:,]+ --> [\d:,]+$/.test(line); + const isIndex = /^\d+$/.test(line); + const isNumeric = /^\d+(\.\d+)?$/.test(line.trim()); + if (!isIndex && !isTimecode && !isNumeric && line.trim().length > 0) { + contentLines.push(line); + contentIndices.push(index); + } + }); + + return { contentLines, contentIndices }; + }; + + const validateInputs = async () => { if (sourceLanguage === targetLanguage) { message.error("源语言和目标语言不能相同"); - return; + return false; } if (translationMethod !== "deeplx" && !apiKeyDeepl && !apiKeyGoogleTranslate && !apiKeyAzure) { message.error("请设置 API Key"); - return; + return false; } if (translationMethod === "deeplx") { @@ -162,57 +181,64 @@ const ClientPage = () => { if (!isDeeplxWorking) { message.error("当前 Deeplx 节点有问题,请切换其他翻译模式"); setTranslationMethod("google"); // 默认切换到 Google 翻译 - return; + return false; } } - setTranslateInProgress(true); - setStartTime(Date.now()); + return true; + }; + const performTranslation = async (sourceText: string, fileName?: string) => { const lines = sourceText.split("\n"); - const translatedLines: string[] = []; + const { contentLines, contentIndices } = filterContentLines(lines); - for (const line of lines) { - const isTimecode = /^[\d:,]+ --> [\d:,]+$/.test(line); - const isIndex = /^\d+$/.test(line); - const isNumeric = /^\d+(\.\d+)?$/.test(line.trim()); - if (!isIndex && !isTimecode && !isNumeric && line.trim().length > 0) { - try { - const translatedLine = await translateTextUsingMethod(line); - translatedLines.push(translatedLine); - } catch (error) { - message.error("翻译过程中发生错误"); - setTranslateInProgress(false); - return; - } + try { + const chunks = splitTextIntoChunks(contentLines.join("\n"), 3000); + const translatedLines: string[] = []; + + for (const chunk of chunks) { + const translatedContent = await translateTextUsingMethod(chunk); + translatedLines.push(translatedContent); + } + + const finalTranslatedLines = translatedLines.join("\n").split("\n"); + const translatedTextArray = [...lines]; + contentIndices.forEach((index, i) => { + translatedTextArray[index] = finalTranslatedLines[i]; + }); + + const translatedText = translatedTextArray.join("\n"); + + if (fileName) { + const blob = new Blob([translatedText], { + type: "text/plain;charset=utf-8", + }); + const link = document.createElement("a"); + link.href = URL.createObjectURL(blob); + link.download = fileName; + link.click(); + URL.revokeObjectURL(link.href); } else { - translatedLines.push(line); + setTranslatedText(translatedText); } + } catch (error) { + message.error("翻译过程中发生错误"); + } finally { + setTranslateInProgress(false); } - - setTranslatedText(translatedLines.join("\n")); - setTranslateInProgress(false); }; - const handleMultipleTranslate = async () => { - if (sourceLanguage === targetLanguage) { - message.error("源语言和目标语言不能相同"); - return; - } + const handleTranslate = async () => { + if (!(await validateInputs())) return; - if (translationMethod !== "deeplx" && !apiKeyDeepl && !apiKeyGoogleTranslate && !apiKeyAzure) { - message.error("请设置 API Key"); - return; - } + setTranslateInProgress(true); + setStartTime(Date.now()); - if (translationMethod === "deeplx") { - const isDeeplxWorking = await testDeeplxTranslation(); - if (!isDeeplxWorking) { - message.error("当前 Deeplx 节点有问题,请切换其他翻译模式"); - setTranslationMethod("google"); // 默认切换到 Google 翻译 - return; - } - } + await performTranslation(sourceText); + }; + + const handleMultipleTranslate = async () => { + if (!(await validateInputs())) return; if (multipleFiles.length === 0) { message.error("请选择要翻译的字幕文件"); @@ -222,54 +248,19 @@ const ClientPage = () => { setTranslateInProgress(true); setStartTime(Date.now()); - try { - for (const currentFile of multipleFiles) { - const reader = new FileReader(); - await new Promise((resolve) => { - reader.onload = async (e) => { - const sourceText = e.target?.result as string; - const lines = sourceText.split("\n"); - const translatedLines: string[] = []; - - for (const line of lines) { - const isTimecode = /^[\d:,]+ --> [\d:,]+$/.test(line); - const isIndex = /^\d+$/.test(line); - const isNumeric = /^\d+(\.\d+)?$/.test(line.trim()); - if (!isIndex && !isTimecode && !isNumeric && line.trim().length > 0) { - try { - const translatedLine = await translateTextUsingMethod(line); - translatedLines.push(translatedLine); - } catch (error) { - message.error("翻译过程中发生错误"); - setTranslateInProgress(false); - return; - } - } else { - translatedLines.push(line); - } - } - - const translatedText = translatedLines.join("\n"); - const blob = new Blob([translatedText], { - type: "text/plain;charset=utf-8", - }); - const link = document.createElement("a"); - link.href = URL.createObjectURL(blob); - link.download = `${currentFile.name}`; - link.click(); - URL.revokeObjectURL(link.href); - - resolve(); - }; - reader.readAsText(currentFile); - }); - } - message.success("翻译完成,已自动下载所有翻译后的字幕文件"); - } catch (error) { - message.error("翻译过程中发生错误"); - } finally { - setTranslateInProgress(false); + for (const currentFile of multipleFiles) { + const reader = new FileReader(); + await new Promise((resolve) => { + reader.onload = async (e) => { + const text = (e.target?.result as string).replace(/\r\n/g, "\n"); + await performTranslation(text, currentFile.name); + resolve(); + }; + reader.readAsText(currentFile); + }); } + + message.success("翻译完成,已自动下载所有翻译后的字幕文件"); }; const handleExportSubtitle = () => { diff --git a/src/app/components/translateText.tsx b/src/app/components/translateText.tsx index 9a5b41b..402668f 100644 --- a/src/app/components/translateText.tsx +++ b/src/app/components/translateText.tsx @@ -7,6 +7,26 @@ interface TranslateTextParams { apiRegion?: string; } +export const splitTextIntoChunks = (text: string, maxLength: number) => { + const chunks = []; + let currentChunk = ""; + + text.split("\n").forEach((line) => { + if (currentChunk.length + line.length + 1 > maxLength) { + chunks.push(currentChunk); + currentChunk = line; + } else { + currentChunk += currentChunk ? "\n" + line : line; + } + }); + + if (currentChunk) { + chunks.push(currentChunk); + } + + return chunks; +}; + export const translateText = async ({ text, translationMethod, targetLanguage, sourceLanguage, apiKey, apiRegion = "eastasia" }: TranslateTextParams): Promise => { try { // 如果文本为空或源语言和目标语言相同,则直接返回原文