amine_dubs
commited on
Commit
·
47cd112
1
Parent(s):
4e86ac5
nnn
Browse files- backend/main.py +143 -120
backend/main.py
CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import HTMLResponse, JSONResponse
|
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from fastapi.templating import Jinja2Templates
|
5 |
from typing import List, Optional
|
|
|
6 |
import os
|
7 |
import requests
|
8 |
import json
|
@@ -13,6 +14,12 @@ import subprocess
|
|
13 |
import sys
|
14 |
import time
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Import transformers for local model inference
|
17 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
18 |
import torch
|
@@ -170,7 +177,11 @@ def translate_text(text, source_lang, target_lang):
|
|
170 |
|
171 |
try:
|
172 |
# Prepare input with explicit instruction format for better results with flan-t5
|
173 |
-
|
|
|
|
|
|
|
|
|
174 |
|
175 |
# Use a more reliable timeout approach with concurrent.futures
|
176 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
@@ -353,139 +364,151 @@ async def read_root(request: Request):
|
|
353 |
return templates.TemplateResponse("index.html", {"request": request})
|
354 |
|
355 |
@app.post("/translate/text")
|
356 |
-
async def
|
357 |
global translator, model, tokenizer
|
358 |
|
359 |
-
source_lang = request.source_lang
|
360 |
-
target_lang = request.target_lang
|
361 |
-
text = request.text
|
362 |
-
|
363 |
-
print(f"Translation Request - Source Lang: {source_lang}, Target Lang: {target_lang}")
|
364 |
-
|
365 |
-
translation_result = ""
|
366 |
-
error_message = None
|
367 |
-
|
368 |
try:
|
369 |
-
#
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
if not success:
|
374 |
-
raise Exception("Failed to initialize translation model")
|
375 |
|
376 |
-
|
377 |
-
lang_code_map = {
|
378 |
-
"en": "English", "es": "Spanish", "fr": "French", "de": "German",
|
379 |
-
"zh": "Chinese", "ja": "Japanese", "ko": "Korean", "ar": "Arabic",
|
380 |
-
"ru": "Russian", "pt": "Portuguese", "it": "Italian", "nl": "Dutch"
|
381 |
-
}
|
382 |
|
383 |
-
|
384 |
-
|
385 |
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
)
|
406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
|
|
|
408 |
try:
|
409 |
-
|
410 |
-
|
|
|
|
|
|
|
|
|
|
|
411 |
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
except Exception as e:
|
423 |
-
print(f"Error
|
424 |
-
|
|
|
|
|
425 |
|
426 |
except Exception as e:
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
try:
|
434 |
-
import googletrans
|
435 |
-
except ImportError:
|
436 |
-
print("Installing googletrans package...")
|
437 |
-
subprocess.call([sys.executable, "-m", "pip", "install", "googletrans==4.0.0-rc1"])
|
438 |
-
|
439 |
-
# Try LibreTranslate providers
|
440 |
-
libre_apis = [
|
441 |
-
"https://translate.terraprint.co/translate",
|
442 |
-
"https://libretranslate.de/translate",
|
443 |
-
"https://translate.argosopentech.com/translate",
|
444 |
-
"https://translate.fedilab.app/translate"
|
445 |
-
]
|
446 |
-
|
447 |
-
for api_url in libre_apis:
|
448 |
-
try:
|
449 |
-
print(f"Attempting fallback translation using LibreTranslate: {api_url}")
|
450 |
-
payload = {
|
451 |
-
"q": text,
|
452 |
-
"source": source_lang,
|
453 |
-
"target": target_lang,
|
454 |
-
"format": "text",
|
455 |
-
"api_key": ""
|
456 |
-
}
|
457 |
-
headers = {"Content-Type": "application/json"}
|
458 |
-
response = requests.post(api_url, json=payload, headers=headers, timeout=5)
|
459 |
-
|
460 |
-
if response.status_code == 200:
|
461 |
-
result = response.json()
|
462 |
-
if "translatedText" in result:
|
463 |
-
translation_result = result["translatedText"]
|
464 |
-
print(f"LibreTranslate successful: {translation_result}")
|
465 |
-
break
|
466 |
-
except Exception as libre_error:
|
467 |
-
print(f"Error with LibreTranslate {api_url}: {str(libre_error)}")
|
468 |
-
|
469 |
-
# If LibreTranslate failed, try Google Translate
|
470 |
-
if not translation_result:
|
471 |
-
try:
|
472 |
-
print("Attempting fallback with Google Translate (no API key)")
|
473 |
-
from googletrans import Translator
|
474 |
-
google_translator = Translator()
|
475 |
-
result = google_translator.translate(text, src=source_lang, dest=target_lang)
|
476 |
-
translation_result = result.text
|
477 |
-
print(f"Google Translate successful: {translation_result}")
|
478 |
-
except Exception as google_error:
|
479 |
-
print(f"Error with Google Translate fallback: {str(google_error)}")
|
480 |
-
|
481 |
-
except Exception as fallback_error:
|
482 |
-
print(f"All fallback translation methods failed: {str(fallback_error)}")
|
483 |
-
|
484 |
-
# If all translation attempts failed
|
485 |
-
if not translation_result:
|
486 |
-
return {"success": False, "error": error_message or "All translation methods failed"}
|
487 |
-
|
488 |
-
return {"success": True, "translation": translation_result}
|
489 |
|
490 |
@app.post("/translate/document")
|
491 |
async def translate_document_endpoint(
|
|
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from fastapi.templating import Jinja2Templates
|
5 |
from typing import List, Optional
|
6 |
+
from pydantic import BaseModel
|
7 |
import os
|
8 |
import requests
|
9 |
import json
|
|
|
14 |
import sys
|
15 |
import time
|
16 |
|
17 |
+
# Define the TranslationRequest model
|
18 |
+
class TranslationRequest(BaseModel):
|
19 |
+
text: str
|
20 |
+
source_lang: str
|
21 |
+
target_lang: str
|
22 |
+
|
23 |
# Import transformers for local model inference
|
24 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
25 |
import torch
|
|
|
177 |
|
178 |
try:
|
179 |
# Prepare input with explicit instruction format for better results with flan-t5
|
180 |
+
if target_lang == "Arabic" or target_lang == "ar":
|
181 |
+
# Special prompt for Arabic translations
|
182 |
+
input_text = f"You are a bilingual in {source_lang} and Arabic, a professional translator, translate this script from {source_lang} to Arabic MSA with cultural sensitivity and accuracy, with a focus on meaning and eloquence (Balagha), avoiding overly literal translations.: {text}"
|
183 |
+
else:
|
184 |
+
input_text = f"Translate from {source_lang} to {target_lang}: {text}"
|
185 |
|
186 |
# Use a more reliable timeout approach with concurrent.futures
|
187 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
|
364 |
return templates.TemplateResponse("index.html", {"request": request})
|
365 |
|
366 |
@app.post("/translate/text")
|
367 |
+
async def translate_text_endpoint(request: TranslationRequest):
|
368 |
global translator, model, tokenizer
|
369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
try:
|
371 |
+
# Explicitly extract fields from request to ensure they exist
|
372 |
+
source_lang = request.source_lang
|
373 |
+
target_lang = request.target_lang
|
374 |
+
text = request.text
|
|
|
|
|
375 |
|
376 |
+
print(f"Translation Request - Source Lang: {source_lang}, Target Lang: {target_lang}, Text: {text[:50]}...")
|
|
|
|
|
|
|
|
|
|
|
377 |
|
378 |
+
translation_result = ""
|
379 |
+
error_message = None
|
380 |
|
381 |
+
try:
|
382 |
+
# Check if translator is initialized, if not, initialize it
|
383 |
+
if translator is None:
|
384 |
+
print("Translator not initialized. Attempting to initialize model...")
|
385 |
+
success = initialize_model()
|
386 |
+
if not success:
|
387 |
+
raise Exception("Failed to initialize translation model")
|
388 |
+
|
389 |
+
# Format the prompt for the model
|
390 |
+
lang_code_map = {
|
391 |
+
"en": "English", "es": "Spanish", "fr": "French", "de": "German",
|
392 |
+
"zh": "Chinese", "ja": "Japanese", "ko": "Korean", "ar": "Arabic",
|
393 |
+
"ru": "Russian", "pt": "Portuguese", "it": "Italian", "nl": "Dutch"
|
394 |
+
}
|
395 |
+
|
396 |
+
source_lang_name = lang_code_map.get(source_lang.lower(), source_lang)
|
397 |
+
target_lang_name = lang_code_map.get(target_lang.lower(), target_lang)
|
398 |
+
|
399 |
+
# Create a proper prompt for instruction-based models
|
400 |
+
prompt = f"Translate from {source_lang_name} to {target_lang_name}: {text}"
|
401 |
+
print(f"Using prompt: {prompt}")
|
402 |
+
|
403 |
+
# Check that translator is callable before proceeding
|
404 |
+
if not callable(translator):
|
405 |
+
print("Translator is not callable, attempting to reinitialize")
|
406 |
+
success = initialize_model()
|
407 |
+
if not success or not callable(translator):
|
408 |
+
raise Exception("Translator is not callable after reinitialization")
|
409 |
+
|
410 |
+
# Use a thread pool to execute the translation with a timeout
|
411 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
412 |
+
future = executor.submit(
|
413 |
+
lambda: translator(
|
414 |
+
prompt,
|
415 |
+
max_length=512,
|
416 |
+
do_sample=False,
|
417 |
+
temperature=0.7
|
418 |
+
)
|
419 |
)
|
420 |
+
|
421 |
+
try:
|
422 |
+
result = future.result(timeout=15)
|
423 |
+
# Check result format before accessing elements
|
424 |
+
if not result or not isinstance(result, list) or len(result) == 0:
|
425 |
+
raise Exception(f"Invalid model output format: {result}")
|
426 |
+
|
427 |
+
translation_result = result[0]["generated_text"]
|
428 |
+
|
429 |
+
# Clean up the output - remove any prefix like "Translation:"
|
430 |
+
prefixes = ["Translation:", "Translation: ", f"{target_lang_name}:", f"{target_lang_name}: "]
|
431 |
+
for prefix in prefixes:
|
432 |
+
if translation_result.startswith(prefix):
|
433 |
+
translation_result = translation_result[len(prefix):].strip()
|
434 |
+
|
435 |
+
print(f"Local model translation result: {translation_result}")
|
436 |
+
except concurrent.futures.TimeoutError:
|
437 |
+
print("Translation timed out after 15 seconds")
|
438 |
+
raise Exception("Translation timed out")
|
439 |
+
except Exception as e:
|
440 |
+
print(f"Error using local model: {str(e)}")
|
441 |
+
raise Exception(f"Error using local model: {str(e)}")
|
442 |
+
|
443 |
+
except Exception as e:
|
444 |
+
error_message = str(e)
|
445 |
+
print(f"Error using local model: {error_message}")
|
446 |
|
447 |
+
# Try the fallback options
|
448 |
try:
|
449 |
+
# Try LibreTranslate providers
|
450 |
+
libre_apis = [
|
451 |
+
"https://translate.terraprint.co/translate",
|
452 |
+
"https://libretranslate.de/translate",
|
453 |
+
"https://translate.argosopentech.com/translate",
|
454 |
+
"https://translate.fedilab.app/translate"
|
455 |
+
]
|
456 |
|
457 |
+
for api_url in libre_apis:
|
458 |
+
try:
|
459 |
+
print(f"Attempting fallback translation using LibreTranslate: {api_url}")
|
460 |
+
payload = {
|
461 |
+
"q": text,
|
462 |
+
"source": source_lang,
|
463 |
+
"target": target_lang,
|
464 |
+
"format": "text",
|
465 |
+
"api_key": ""
|
466 |
+
}
|
467 |
+
headers = {"Content-Type": "application/json"}
|
468 |
+
response = requests.post(api_url, json=payload, headers=headers, timeout=5)
|
469 |
|
470 |
+
if response.status_code == 200:
|
471 |
+
result = response.json()
|
472 |
+
if "translatedText" in result:
|
473 |
+
translation_result = result["translatedText"]
|
474 |
+
print(f"LibreTranslate successful: {translation_result}")
|
475 |
+
break
|
476 |
+
except Exception as libre_error:
|
477 |
+
print(f"Error with LibreTranslate {api_url}: {str(libre_error)}")
|
478 |
+
|
479 |
+
# If LibreTranslate failed, try Google Translate
|
480 |
+
if not translation_result:
|
481 |
+
try:
|
482 |
+
# First try to import it
|
483 |
+
try:
|
484 |
+
from googletrans import Translator
|
485 |
+
google_translator = Translator()
|
486 |
+
result = google_translator.translate(text, src=source_lang, dest=target_lang)
|
487 |
+
translation_result = result.text
|
488 |
+
print(f"Google Translate successful: {translation_result}")
|
489 |
+
except ImportError:
|
490 |
+
print("googletrans package not installed, attempting to install...")
|
491 |
+
subprocess.call([sys.executable, "-m", "pip", "install", "googletrans==4.0.0-rc1"])
|
492 |
+
# After installation, try again
|
493 |
+
from googletrans import Translator
|
494 |
+
google_translator = Translator()
|
495 |
+
result = google_translator.translate(text, src=source_lang, dest=target_lang)
|
496 |
+
translation_result = result.text
|
497 |
+
except Exception as google_error:
|
498 |
+
print(f"Error with Google Translate fallback: {str(google_error)}")
|
499 |
except Exception as e:
|
500 |
+
print(f"Error with fallback translation: {str(e)}")
|
501 |
+
translation_result = f"[Translation failed during fallback] {text}"
|
502 |
+
|
503 |
+
return {"success": True, "translation": translation_result}
|
504 |
|
505 |
except Exception as e:
|
506 |
+
print(f"Critical error in translate_text_endpoint: {str(e)}")
|
507 |
+
traceback.print_exc()
|
508 |
+
return JSONResponse(
|
509 |
+
status_code=500,
|
510 |
+
content={"success": False, "error": f"Translation failed: {str(e)}"}
|
511 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
|
513 |
@app.post("/translate/document")
|
514 |
async def translate_document_endpoint(
|