Spaces:
Running
Running
File size: 6,269 Bytes
96c8569 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
"""
translator.py
腾讯云 + 百度翻译 API 轮询封装
⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量
------------------------------------------------------------------
TENCENT_SECRET_ID 腾讯云 SecretId
TENCENT_SECRET_KEY 腾讯云 SecretKey
TENCENT_TRANSLATE_URL (可选) 默认 https://tmt.tencentcloudapi.com
BAIDU_TRANSLATE_URL (可选) 默认 https://fanyi-api.baidu.com/api/trans/vip/translate
BAIDU_CREDENTIALS_JSON 形如:
[
{"app_id": "xxxx", "secret_key": "yyyy"},
{"app_id": "aaaa", "secret_key": "bbbb"}
]
------------------------------------------------------------------
"""
import hashlib, hmac, json, os, random, time
from datetime import datetime
from typing import List, Sequence, Optional
import requests
# ------------------------------------------------------------------
# 读取环境变量
# ------------------------------------------------------------------
TENCENT_SECRET_ID = os.environ.get("TENCENT_SECRET_ID")
TENCENT_SECRET_KEY = os.environ.get("TENCENT_SECRET_KEY")
TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com")
BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate")
BAIDU_CREDENTIALS = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]"))
# 内部轮询索引
_baidu_idx: int = 0
def _next_baidu_cred():
global _baidu_idx
if not BAIDU_CREDENTIALS:
return None
cred = BAIDU_CREDENTIALS[_baidu_idx]
_baidu_idx = (_baidu_idx + 1) % len(BAIDU_CREDENTIALS)
return cred
# ------------------------------------------------------------------
# 腾讯翻译
# ------------------------------------------------------------------
def _sign(key: bytes, msg: str) -> bytes:
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
def _tc3_signature(secret_key: str, date: str, service: str, string_to_sign: str) -> str:
secret_date = _sign(("TC3" + secret_key).encode(), date)
secret_service = _sign(secret_date, service)
secret_signing = _sign(secret_service, "tc3_request")
return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
def _translate_with_tencent(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
"""优先使用腾讯云翻译。失败返回 None"""
if not (TENCENT_SECRET_ID and TENCENT_SECRET_KEY):
return None # 未配置凭证
service = "tmt"
host = "tmt.tencentcloudapi.com"
action = "TextTranslate"
version = "2018-03-21"
region = "ap-beijing"
ts = int(time.time())
date = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
algorithm = "TC3-HMAC-SHA256"
payload = {
"SourceText": "\n".join(texts),
"Source": src,
"Target": tgt,
"ProjectId": 0,
}
payload_str = json.dumps(payload, ensure_ascii=False)
# ---------- step‑1 canonical request ----------
canonical_request = "\n".join([
"POST",
"/",
"",
f"content-type:application/json; charset=utf-8\nhost:{host}\nx-tc-action:{action.lower()}\n",
"content-type;host;x-tc-action",
hashlib.sha256(payload_str.encode()).hexdigest(),
])
# ---------- step‑2 string to sign ----------
credential_scope = f"{date}/{service}/tc3_request"
string_to_sign = "\n".join([
algorithm, str(ts), credential_scope,
hashlib.sha256(canonical_request.encode()).hexdigest(),
])
# ---------- step‑3 signature ----------
signature = _tc3_signature(TENCENT_SECRET_KEY, date, service, string_to_sign)
# ---------- step‑4 headers ----------
authorization = (
f"{algorithm} Credential={TENCENT_SECRET_ID}/{credential_scope}, "
f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}"
)
headers = {
"Authorization": authorization,
"Content-Type": "application/json; charset=utf-8",
"Host": host,
"X-TC-Action": action,
"X-TC-Timestamp": str(ts),
"X-TC-Version": version,
"X-TC-Region": region,
}
# ---------- request ----------
try:
resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8)
resp.raise_for_status()
data = resp.json()
return data["Response"]["TargetText"].split("\n")
except Exception as e:
print(f"[translator] Tencent API error → {e}")
return None
# ------------------------------------------------------------------
# 百度翻译
# ------------------------------------------------------------------
def _translate_with_baidu(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
creds = _next_baidu_cred()
if creds is None:
return None # 未配置凭证
app_id, secret_key = creds["app_id"], creds["secret_key"]
salt = random.randint(32768, 65536)
query = "\n".join(texts)
sign = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest()
params = {
"q": query, "from": src, "to": tgt,
"appid": app_id, "salt": salt, "sign": sign,
}
try:
resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8)
resp.raise_for_status()
data = resp.json()
return [item["dst"] for item in data["trans_result"]]
except Exception as e:
print(f"[translator] Baidu API error → {e}")
return None
# ------------------------------------------------------------------
# 对外统一函数
# ------------------------------------------------------------------
def translate_texts(texts: Sequence[str],
src_lang: str = "auto",
tgt_lang: str = "zh") -> List[str]:
"""
优先 Tencent → 失败再 Baidu → 如果都失败,返回原文。
"""
if not texts:
return []
out = _translate_with_tencent(texts, src_lang, tgt_lang)
if out is None:
out = _translate_with_baidu(texts, src_lang, tgt_lang)
return out or list(texts)
|