-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
40 lines (33 loc) · 7.16 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import time
from flask import Flask, render_template, request
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
languages = [ ['Acehnese (Arabic script)', 'ace_Arab'], ['Acehnese (Latin script)', 'ace_Latn'], ['Mesopotamian Arabic', 'acm_Arab'], ['Ta’izzi-Adeni Arabic', 'acq_Arab'], ['Tunisian Arabic', 'aeb_Arab'], ['Afrikaans', 'afr_Latn'], ['South Levantine Arabic', 'ajp_Arab'], ['Akan', 'aka_Latn'], ['Amharic', 'amh_Ethi'], ['North Levantine Arabic', 'apc_Arab'], ['Modern Standard Arabic', 'arb_Arab'], ['Modern Standard Arabic (Romanized)', 'arb_Latn'], ['Najdi Arabic', 'ars_Arab'], ['Moroccan Arabic', 'ary_Arab'], ['Egyptian Arabic', 'arz_Arab'], ['Assamese', 'asm_Beng'], ['Asturian', 'ast_Latn'], ['Awadhi', 'awa_Deva'], ['Central Aymara', 'ayr_Latn'], ['South Azerbaijani', 'azb_Arab'], ['North Azerbaijani', 'azj_Latn'], ['Bashkir', 'bak_Cyrl'], ['Bambara', 'bam_Latn'], ['Balinese', 'ban_Latn'], ['Belarusian', 'bel_Cyrl'], ['Bemba', 'bem_Latn'], ['Bengali', 'ben_Beng'], ['Bhojpuri', 'bho_Deva'], ['Banjar (Arabic script)', 'bjn_Arab'], ['Banjar (Latin script)', 'bjn_Latn'], ['Standard Tibetan', 'bod_Tibt'], ['Bosnian', 'bos_Latn'], ['Buginese', 'bug_Latn'], ['Bulgarian', 'bul_Cyrl'], ['Catalan', 'cat_Latn'], ['Cebuano', 'ceb_Latn'], ['Czech', 'ces_Latn'], ['Chokwe', 'cjk_Latn'], ['Central Kurdish', 'ckb_Arab'], ['Crimean Tatar', 'crh_Latn'], ['Welsh', 'cym_Latn'], ['Danish', 'dan_Latn'], ['German', 'deu_Latn'], ['Southwestern Dinka', 'dik_Latn'], ['Dyula', 'dyu_Latn'], ['Dzongkha', 'dzo_Tibt'], ['Greek', 'ell_Grek'], ['English', 'eng_Latn'], ['Esperanto', 'epo_Latn'], ['Estonian', 'est_Latn'], ['Basque', 'eus_Latn'], ['Ewe', 'ewe_Latn'], ['Faroese', 'fao_Latn'], ['Fijian', 'fij_Latn'], ['Finnish', 'fin_Latn'], ['Fon', 'fon_Latn'], ['French', 'fra_Latn'], ['Friulian', 'fur_Latn'], ['Nigerian Fulfulde', 'fuv_Latn'], ['Scottish Gaelic', 'gla_Latn'], ['Irish', 'gle_Latn'], ['Galician', 'glg_Latn'], ['Guarani', 'grn_Latn'], ['Gujarati', 'guj_Gujr'], ['Haitian Creole', 'hat_Latn'], ['Hausa', 'hau_Latn'], ['Hebrew', 'heb_Hebr'], ['Hindi', 'hin_Deva'], ['Chhattisgarhi', 'hne_Deva'], ['Croatian', 'hrv_Latn'], ['Hungarian', 'hun_Latn'], ['Armenian', 'hye_Armn'], ['Igbo', 'ibo_Latn'], ['Ilocano', 'ilo_Latn'], ['Indonesian', 'ind_Latn'], ['Icelandic', 'isl_Latn'], ['Italian', 'ita_Latn'], ['Javanese', 'jav_Latn'], ['Japanese', 'jpn_Jpan'], ['Kabyle', 'kab_Latn'], ['Jingpho', 'kac_Latn'], ['Kamba', 'kam_Latn'], ['Kannada', 'kan_Knda'], ['Kashmiri (Arabic script)', 'kas_Arab'], ['Kashmiri (Devanagari script)', 'kas_Deva'], ['Georgian', 'kat_Geor'], ['Central Kanuri (Arabic script)', 'knc_Arab'], ['Central Kanuri (Latin script)', 'knc_Latn'], ['Kazakh', 'kaz_Cyrl'], ['Kabiyè', 'kbp_Latn'], ['Kabuverdianu', 'kea_Latn'], ['Khmer', 'khm_Khmr'], ['Kikuyu', 'kik_Latn'], ['Kinyarwanda', 'kin_Latn'], ['Kyrgyz', 'kir_Cyrl'], ['Kimbundu', 'kmb_Latn'], ['Northern Kurdish', 'kmr_Latn'], ['Kikongo', 'kon_Latn'], ['Korean', 'kor_Hang'], ['Lao', 'lao_Laoo'], ['Ligurian', 'lij_Latn'], ['Limburgish', 'lim_Latn'], ['Lingala', 'lin_Latn'], ['Lithuanian', 'lit_Latn'], ['Lombard', 'lmo_Latn'], ['Latgalian', 'ltg_Latn'], ['Luxembourgish', 'ltz_Latn'], ['Luba-Kasai', 'lua_Latn'], ['Ganda', 'lug_Latn'], ['Luo', 'luo_Latn'], ['Mizo', 'lus_Latn'], ['Standard Latvian', 'lvs_Latn'], ['Magahi', 'mag_Deva'], ['Maithili', 'mai_Deva'], ['Malayalam', 'mal_Mlym'], ['Marathi', 'mar_Deva'], ['Minangkabau (Arabic script)', 'min_Arab'], ['Minangkabau (Latin script)', 'min_Latn'], ['Macedonian', 'mkd_Cyrl'], ['Plateau Malagasy', 'plt_Latn'], ['Maltese', 'mlt_Latn'], ['Meitei (Bengali script)', 'mni_Beng'], ['Halh Mongolian', 'khk_Cyrl'], ['Mossi', 'mos_Latn'], ['Maori', 'mri_Latn'], ['Burmese', 'mya_Mymr'], ['Dutch', 'nld_Latn'], ['Norwegian Nynorsk', 'nno_Latn'], ['Norwegian Bokmål', 'nob_Latn'], ['Nepali', 'npi_Deva'], ['Northern Sotho', 'nso_Latn'], ['Nuer', 'nus_Latn'], ['Nyanja', 'nya_Latn'], ['Occitan', 'oci_Latn'], ['West Central Oromo', 'gaz_Latn'], ['Odia', 'ory_Orya'], ['Pangasinan', 'pag_Latn'], ['Eastern Panjabi', 'pan_Guru'], ['Papiamento', 'pap_Latn'], ['Western Persian', 'pes_Arab'], ['Polish', 'pol_Latn'], ['Portuguese', 'por_Latn'], ['Dari', 'prs_Arab'], ['Southern Pashto', 'pbt_Arab'], ['Ayacucho Quechua', 'quy_Latn'], ['Romanian', 'ron_Latn'], ['Rundi', 'run_Latn'], ['Russian', 'rus_Cyrl'], ['Sango', 'sag_Latn'], ['Sanskrit', 'san_Deva'], ['Santali', 'sat_Olck'], ['Sicilian', 'scn_Latn'], ['Shan', 'shn_Mymr'], ['Sinhala', 'sin_Sinh'], ['Slovak', 'slk_Latn'], ['Slovenian', 'slv_Latn'], ['Samoan', 'smo_Latn'], ['Shona', 'sna_Latn'], ['Sindhi', 'snd_Arab'], ['Somali', 'som_Latn'], ['Southern Sotho', 'sot_Latn'], ['Spanish', 'spa_Latn'], ['Tosk Albanian', 'als_Latn'], ['Sardinian', 'srd_Latn'], ['Serbian', 'srp_Cyrl'], ['Swati', 'ssw_Latn'], ['Sundanese', 'sun_Latn'], ['Swedish', 'swe_Latn'], ['Swahili', 'swh_Latn'], ['Silesian', 'szl_Latn'], ['Tamil', 'tam_Taml'], ['Tatar', 'tat_Cyrl'], ['Telugu', 'tel_Telu'], ['Tajik', 'tgk_Cyrl'], ['Tagalog', 'tgl_Latn'], ['Thai', 'tha_Thai'], ['Tigrinya', 'tir_Ethi'], ['Tamasheq (Latin script)', 'taq_Latn'], ['Tamasheq (Tifinagh script)', 'taq_Tfng'], ['Tok Pisin', 'tpi_Latn'], ['Tswana', 'tsn_Latn'], ['Tsonga', 'tso_Latn'], ['Turkmen', 'tuk_Latn'], ['Tumbuka', 'tum_Latn'], ['Turkish', 'tur_Latn'], ['Twi', 'twi_Latn'], ['Central Atlas Tamazight', 'tzm_Tfng'], ['Uyghur', 'uig_Arab'], ['Ukrainian', 'ukr_Cyrl'], ['Umbundu', 'umb_Latn'], ['Urdu', 'urd_Arab'], ['Northern Uzbek', 'uzn_Latn'], ['Venetian', 'vec_Latn'], ['Vietnamese', 'vie_Latn'], ['Waray', 'war_Latn'], ['Wolof', 'wol_Latn'], ['Xhosa', 'xho_Latn'], ['Eastern Yiddish', 'ydd_Hebr'], ['Yoruba', 'yor_Latn'], ['Yue Chinese', 'yue_Hant'], ['Chinese (Simplified)', 'zho_Hans'], ['Chinese (Traditional)', 'zho_Hant'], ['Standard Malay', 'zsm_Latn'], ['Zulu', 'zul_Latn'] ]
class LanguageModel:
def __init__(self):
self.model = AutoModelForSeq2SeqLM.from_pretrained("nllb-200-distilled-1.3B")
self.tokenizer = AutoTokenizer.from_pretrained("nllb-200-distilled-1.3B")
def translate(self, src, out, text):
self.tokenizer.src_lang = src
translator = pipeline('translation', model=self.model, tokenizer=self.tokenizer, src_lang=src, tgt_lang=out)
output = translator(text, max_length=400)
translated_text = output[0]['translation_text']
return translated_text
app = Flask(__name__)
print("*** Loading Model...", flush=True, end="")
app.language_model = LanguageModel()
print("READY", flush=True)
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'GET':
return render_template('index.html', src_lang='eng_Latn', out_lang='eng_Latn', text='', translated_text='', elapsed=0, languages=languages)
elif request.method == 'POST':
start_time = time.time()
src_lang = request.form.get('src_lang')
out_lang = request.form.get('out_lang')
text = request.form.get('text')
translated_text = app.language_model.translate(src_lang, out_lang, text)
elapsed = int(time.time() - start_time)
return render_template('index.html',src_lang=src_lang, out_lang=out_lang, text=text, elapsed=elapsed, translated_text=translated_text, languages=languages)
if __name__ == '__main__':
app.run()