最近在研究语音识别,主要是维语、藏语等小众语言的识别,调用 JTHS 的SDK后,识别结果是维文或者藏文,看不懂啊,必须要翻译成汉语才行,于是又调用 JTHS 官方提供的Web API,但是成功率太低,反馈给技术人员,暂未得到解决,很是失望。于是在网上找到了“中国民族语文翻译局”,这里刚好有这些特殊语种的翻译,于是研究一番,就着手写了Python和C#版本的翻译接口。效果还不错,于是写篇文章记录下。
话不多说,直接贴代码。
Python版
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Author : Anuo.
@License : (C) Copyright 2019, Anuo's Studio
@Contact : 188512936@qq.com
@Software: VS2017
@File: mzywfyj_spider.py
@Time: Feb 20,2019
@Desc: 中国民族语文翻译局,翻译爬虫,网址:http://www.mzywfy.org.cn/translate.jsp
'''
import urllib.request
import urllib.parse
import json
def mzywfyj_translate(trans_text, trans_from, trans_to, trans_url):
'''翻译'''
mzywfyj_url = "http://www.mzywfy.org.cn/ajaxservlet"
data = {}
data['src_text'] = trans_text
data['from'] = trans_from
data['to'] = trans_to
data['url'] = trans_url
data = urllib.parse.urlencode(data).encode('utf-8')
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36"}
headers['Referer'] = 'http://www.mzywfy.org.cn/translate.jsp'; # 这个头必须加,不然不能成功
fanyi_re = urllib.request.Request(mzywfyj_url, data, headers)
fanyi_response = urllib.request.urlopen(fanyi_re)
ret = fanyi_response.read().decode('utf-8')
print(ret)
if __name__ == "__main__":
# 可以根据自己的需要设置参数,然后调用方法mzywfyj_translate()
# 汉文 -> 维吾尔文
trans_text = '你好'
trans_from = 'zh'
trans_to = 'uy'
trans_url = 2
mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
# 维吾尔文 -> 汉文
trans_text = 'ياخشىمۇ سىز'
trans_from = 'uy'
trans_to = 'zh'
trans_url = 7
mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
# 汉文 -> 藏文
trans_text = '你好'
trans_from = 'zh'
trans_to = 'ti'
trans_url = 1
mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
# 藏文 -> 汉文
trans_text = 'སྐུ་ཁམས་བཟང་'
trans_from = 'ti'
trans_to = 'zh'
trans_url = 6
mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
# 汉文 -> 蒙古文
trans_text = '你好'
trans_from = 'zh'
trans_to = 'mo'
trans_url = 0
mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
# 蒙古文 -> 汉文
trans_text = '︽ ᠰᠠᠢᠢᠨ ᠪᠠᠶᠢᠨᠠ ᠤᠣ ︖ '
trans_from = 'mo'
trans_to = 'zh'
trans_url = 5
mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
C#版
/// <summary>
/// 翻译
/// </summary>
/// <param name="transText">需要翻译的内容</param>
/// <param name="transResult">翻译成功返回翻译结果,翻译失败返回错误信息</param>
/// <returns>翻译成功失败,true-成功;false-失败</returns>
public bool Translate(string transText, string from, string to, string url, out string transResult)
{
transResult = "";
string requestUrl = "http://www.mzywfy.org.cn/ajaxservlet";
try
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(requestUrl);
request.Timeout = 5000;
request.Method = "POST";
request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36";
request.Accept = "application/json, text/javascript, */*; q=0.01";
request.Referer = "http://www.mzywfy.org.cn/translate.jsp";
string send = string.Format("src_text={0}&from={1}&to={2}&url={3}", transText, from, to, url);
byte[] postData = Encoding.UTF8.GetBytes(send);
request.ContentLength = postData.Length;
Stream stream = request.GetRequestStream();
stream.Write(postData, 0, postData.Length);
stream.Close();
stream.Dispose();
bool success = false;
using (HttpWebResponse webResponse = (HttpWebResponse)request.GetResponse())
using (StreamReader responseStream = new StreamReader(webResponse.GetResponseStream()))
{
if (webResponse.StatusCode == HttpStatusCode.OK)
{
success = true;
string retJson = responseStream.ReadToEnd();
JObject jo = (JObject)JsonConvert.DeserializeObject(retJson); // 需要引用 Newtonsoft.Json.dll
transResult = jo["tgt_text"].ToString().Trim();
}
}
return success;
}
catch (Exception ex)
{
transResult = ex.ToString();
return false;
}
}
Just so so
Anuo.
Chengdu
Feb 20,2019
网友评论