Spaces:
Running
Running
""" | |
translator.py | |
腾讯云 + 百度翻译 API 轮询封装 | |
⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量 | |
------------------------------------------------------------------ | |
TENCENT_SECRET_ID 腾讯云 SecretId | |
TENCENT_SECRET_KEY 腾讯云 SecretKey | |
TENCENT_TRANSLATE_URL (可选) 默认 https://tmt.tencentcloudapi.com | |
BAIDU_TRANSLATE_URL (可选) 默认 https://fanyi-api.baidu.com/api/trans/vip/translate | |
BAIDU_CREDENTIALS_JSON 形如: | |
[ | |
{"app_id": "xxxx", "secret_key": "yyyy"}, | |
{"app_id": "aaaa", "secret_key": "bbbb"} | |
] | |
------------------------------------------------------------------ | |
""" | |
import hashlib, hmac, json, os, random, time | |
from datetime import datetime | |
from typing import List, Sequence, Optional | |
import requests | |
# ------------------------------------------------------------------ | |
# 读取环境变量 | |
# ------------------------------------------------------------------ | |
TENCENT_SECRET_ID = os.environ.get("TENCENT_SECRET_ID") | |
TENCENT_SECRET_KEY = os.environ.get("TENCENT_SECRET_KEY") | |
TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com") | |
BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate") | |
BAIDU_CREDENTIALS = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]")) | |
# 内部轮询索引 | |
_baidu_idx: int = 0 | |
def _next_baidu_cred(): | |
global _baidu_idx | |
if not BAIDU_CREDENTIALS: | |
return None | |
cred = BAIDU_CREDENTIALS[_baidu_idx] | |
_baidu_idx = (_baidu_idx + 1) % len(BAIDU_CREDENTIALS) | |
return cred | |
# ------------------------------------------------------------------ | |
# 腾讯翻译 | |
# ------------------------------------------------------------------ | |
def _sign(key: bytes, msg: str) -> bytes: | |
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() | |
def _tc3_signature(secret_key: str, date: str, service: str, string_to_sign: str) -> str: | |
secret_date = _sign(("TC3" + secret_key).encode(), date) | |
secret_service = _sign(secret_date, service) | |
secret_signing = _sign(secret_service, "tc3_request") | |
return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() | |
def _translate_with_tencent(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]: | |
"""优先使用腾讯云翻译。失败返回 None""" | |
if not (TENCENT_SECRET_ID and TENCENT_SECRET_KEY): | |
return None # 未配置凭证 | |
service = "tmt" | |
host = "tmt.tencentcloudapi.com" | |
action = "TextTranslate" | |
version = "2018-03-21" | |
region = "ap-beijing" | |
ts = int(time.time()) | |
date = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d") | |
algorithm = "TC3-HMAC-SHA256" | |
payload = { | |
"SourceText": "\n".join(texts), | |
"Source": src, | |
"Target": tgt, | |
"ProjectId": 0, | |
} | |
payload_str = json.dumps(payload, ensure_ascii=False) | |
# ---------- step‑1 canonical request ---------- | |
canonical_request = "\n".join([ | |
"POST", | |
"/", | |
"", | |
f"content-type:application/json; charset=utf-8\nhost:{host}\nx-tc-action:{action.lower()}\n", | |
"content-type;host;x-tc-action", | |
hashlib.sha256(payload_str.encode()).hexdigest(), | |
]) | |
# ---------- step‑2 string to sign ---------- | |
credential_scope = f"{date}/{service}/tc3_request" | |
string_to_sign = "\n".join([ | |
algorithm, str(ts), credential_scope, | |
hashlib.sha256(canonical_request.encode()).hexdigest(), | |
]) | |
# ---------- step‑3 signature ---------- | |
signature = _tc3_signature(TENCENT_SECRET_KEY, date, service, string_to_sign) | |
# ---------- step‑4 headers ---------- | |
authorization = ( | |
f"{algorithm} Credential={TENCENT_SECRET_ID}/{credential_scope}, " | |
f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}" | |
) | |
headers = { | |
"Authorization": authorization, | |
"Content-Type": "application/json; charset=utf-8", | |
"Host": host, | |
"X-TC-Action": action, | |
"X-TC-Timestamp": str(ts), | |
"X-TC-Version": version, | |
"X-TC-Region": region, | |
} | |
# ---------- request ---------- | |
try: | |
resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8) | |
resp.raise_for_status() | |
data = resp.json() | |
return data["Response"]["TargetText"].split("\n") | |
except Exception as e: | |
print(f"[translator] Tencent API error → {e}") | |
return None | |
# ------------------------------------------------------------------ | |
# 百度翻译 | |
# ------------------------------------------------------------------ | |
def _translate_with_baidu(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]: | |
creds = _next_baidu_cred() | |
if creds is None: | |
return None # 未配置凭证 | |
app_id, secret_key = creds["app_id"], creds["secret_key"] | |
salt = random.randint(32768, 65536) | |
query = "\n".join(texts) | |
sign = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest() | |
params = { | |
"q": query, "from": src, "to": tgt, | |
"appid": app_id, "salt": salt, "sign": sign, | |
} | |
try: | |
resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8) | |
resp.raise_for_status() | |
data = resp.json() | |
return [item["dst"] for item in data["trans_result"]] | |
except Exception as e: | |
print(f"[translator] Baidu API error → {e}") | |
return None | |
# ------------------------------------------------------------------ | |
# 对外统一函数 | |
# ------------------------------------------------------------------ | |
def translate_texts(texts: Sequence[str], | |
src_lang: str = "auto", | |
tgt_lang: str = "zh") -> List[str]: | |
""" | |
优先 Tencent → 失败再 Baidu → 如果都失败,返回原文。 | |
""" | |
if not texts: | |
return [] | |
out = _translate_with_tencent(texts, src_lang, tgt_lang) | |
if out is None: | |
out = _translate_with_baidu(texts, src_lang, tgt_lang) | |
return out or list(texts) | |