|
- import requests
- from bs4 import BeautifulSoup
- import time
- import os
-
-
- import warnings
- warnings.filterwarnings('ignore')
-
- url = "https://mt.pcl.ac.cn/"
-
-
- languages = ['en', 'fr', 'ar', 'ru', 'es', 'de', 'cs', 'it', 'nl', 'pt', 'id', 'bg', 'bs', 'fa', 'el', 'hr', 'hu',
- 'he', 'et', 'sl', 'ur', 'tr', 'pl', 'ja', 'ro', 'vi', 'th', 'lt', 'mk', 'uk', 'sk', 'sq', 'mn', 'bn',
- 'hi', 'be', 'si', 'az', 'ta', 'lv']
-
- #languages = ['fr', 'ru', 'es', 'cs','sq', 'mn', 'bn', 'hi', 'be', 'si', 'az', 'ta', 'lv']
-
-
-
- def translate(text, source_language, target_language):
- """
- Note that either source_language or target_language must be 'zh'.
- :param text: the content to be translated into target language.
- :param source_language: Shorthand for the source language, like 'en'
- :param target_language: Shorthand for the target language, like 'zh'
- :return: translation of 'text'
- """
-
- # map the language name into id
- src_lang_id = languages.index(source_language) + 2 if source_language != 'zh' else 1
- tgt_lang_id = languages.index(target_language) + 2 if target_language != 'zh' else 1
- # send a post request, receive a HTML which contain the result HTML page.
- data = {'source': src_lang_id, 'target': tgt_lang_id, 'raw_text':text}
- res = requests.post(url, data=data, headers={'Content-Type': 'application/x-www-form-urlencoded'}, verify=False)
-
- # parse the HTML page and get the translation text.
- soup = BeautifulSoup(res.content)
- translation = soup.find('div', id='outputtext').find('div').get_text().strip()
-
- return translation
-
- pt = r'result'
-
- for lang in languages:
- if lang == 'sq' or lang == 'si':
- continue
-
-
- with open('test-silu/' + lang + '.devtest','r',encoding='utf-8') as f:
- lines=f.readlines()
-
- f.close()
-
- error_text = []
- times = []
- total_time = 0
- i = 0
- print('translation start | '+ lang + '2zh')
- #os.makedirs(pt + '/' + lang +'/')
- with open(pt + '/' + lang +'/' + lang + '2zh' + '.test','w') as f:
-
- for line in lines:
- i += 1
- try:
- starttime = time.time()
- result = translate(line, lang, 'zh')
- endtime = time.time()
- response_time = endtime - starttime
- times.append(response_time)
- total_time += response_time
- f.writelines(result)
- f.writelines('\n')
- except Exception as e:
- f.writelines('\n')
- error_text.append(i)
- print('translation error')
- continue
-
-
- f.close()
-
- everage_response_time = total_time / i
-
- with open(pt + '/' + lang +'/' + lang + '2zh' + '.txt','w') as f:
-
- for tm in times:
- f.writelines(str(tm))
- f.writelines('\n')
-
- f.writelines('total_time:'+str(total_time))
- f.writelines('\n')
- f.writelines('everage_response_time:'+str(everage_response_time))
- f.writelines('\n')
- f.writelines('error text:')
- for j in error_text:
- f.writelines(str(j))
- f.writelines('\t')
-
- f.close()
- print("error text:")
- print(error_text)
- print('total_time')
- print(total_time)
- print('everage_response_time')
- print(everage_response_time)
- print('translation completed')
- print('--------------------------------------------------------------------------')
|