|
from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Request |
|
from fastapi.responses import HTMLResponse, JSONResponse |
|
from fastapi.staticfiles import StaticFiles |
|
from fastapi.templating import Jinja2Templates |
|
import os |
|
from typing import List, Optional |
|
import shutil |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
TEMPLATE_DIR = os.path.join(os.path.dirname(BASE_DIR), "templates") |
|
STATIC_DIR = os.path.join(os.path.dirname(BASE_DIR), "static") |
|
UPLOAD_DIR = os.path.join(os.path.dirname(BASE_DIR), "uploads") |
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
|
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") |
|
|
|
|
|
templates = Jinja2Templates(directory=TEMPLATE_DIR) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def translate_text_internal(text: str, source_lang: str, target_lang: str = "ar") -> str: |
|
"""Internal function to handle text translation using the loaded model.""" |
|
|
|
prompt = f"""Translate the following text from {source_lang} to Arabic (Modern Standard Arabic - Fusha) precisely. Do not provide a literal translation; focus on conveying the meaning accurately while respecting Arabic eloquence (balagha) by rephrasing if necessary: |
|
|
|
{text}""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Using Prompt: {prompt}") |
|
|
|
return f"[Simulated Translation of '{text}' from {source_lang} to MSA Arabic, focusing on meaning and eloquence]" |
|
|
|
|
|
|
|
def extract_text_from_file(file_path: str, file_type: str) -> str: |
|
"""Extracts text from various document types.""" |
|
text = "" |
|
try: |
|
if file_type == "application/pdf": |
|
import fitz |
|
with fitz.open(file_path) as doc: |
|
for page in doc: |
|
text += page.get_text() |
|
elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": |
|
from docx import Document |
|
doc = Document(file_path) |
|
for para in doc.paragraphs: |
|
text += para.text + "\n" |
|
elif file_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": |
|
import openpyxl |
|
workbook = openpyxl.load_workbook(file_path) |
|
for sheet_name in workbook.sheetnames: |
|
sheet = workbook[sheet_name] |
|
for row in sheet.iter_rows(): |
|
for cell in row: |
|
if cell.value: |
|
text += str(cell.value) + " " |
|
text += "\n" |
|
elif file_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation": |
|
from pptx import Presentation |
|
prs = Presentation(file_path) |
|
for slide in prs.slides: |
|
for shape in slide.shapes: |
|
if hasattr(shape, "text"): |
|
text += shape.text + "\n" |
|
|
|
elif file_type.startswith("text/"): |
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: |
|
text = f.read() |
|
else: |
|
raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_type}. Cannot extract text.") |
|
|
|
except ImportError as ie: |
|
print(f"Import error for {file_type}: {ie}. Make sure the required library is installed.") |
|
|
|
if os.path.exists(file_path): |
|
os.remove(file_path) |
|
raise HTTPException(status_code=501, detail=f"Text extraction for {file_type} requires an additional library: {ie.name}. Please install it (check requirements.txt). The file was not processed.") |
|
except Exception as e: |
|
print(f"Error extracting text from {file_path} ({file_type}): {e}") |
|
|
|
if os.path.exists(file_path): |
|
os.remove(file_path) |
|
raise HTTPException(status_code=500, detail=f"Failed to extract text from file: {e}") |
|
|
|
|
|
return text |
|
|
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
async def read_root(request: Request): |
|
"""Serves the main HTML page.""" |
|
|
|
if not os.path.exists(TEMPLATE_DIR): |
|
raise HTTPException(status_code=500, detail=f"Template directory not found at {TEMPLATE_DIR}") |
|
if not os.path.exists(os.path.join(TEMPLATE_DIR, "index.html")): |
|
raise HTTPException(status_code=500, detail=f"index.html not found in {TEMPLATE_DIR}") |
|
return templates.TemplateResponse("index.html", {"request": request}) |
|
|
|
@app.post("/translate/text") |
|
async def translate_text_endpoint( |
|
text: str = Form(...), |
|
source_lang: str = Form(...), |
|
target_lang: str = Form("ar") |
|
): |
|
"""Translates direct text input.""" |
|
if not text: |
|
raise HTTPException(status_code=400, detail="No text provided for translation.") |
|
|
|
|
|
|
|
|
|
|
|
if target_lang != "ar": |
|
raise HTTPException(status_code=400, detail="Currently, only translation to Arabic (ar) is supported via this endpoint.") |
|
|
|
try: |
|
|
|
actual_source_lang = source_lang |
|
|
|
|
|
|
|
translated_text = translate_text_internal(text, actual_source_lang, target_lang) |
|
return JSONResponse(content={"translated_text": translated_text, "source_lang": actual_source_lang}) |
|
except HTTPException as http_exc: |
|
|
|
raise http_exc |
|
except Exception as e: |
|
print(f"Unexpected error in /translate/text: {e}") |
|
raise HTTPException(status_code=500, detail=f"An unexpected error occurred during text translation: {e}") |
|
|
|
|
|
@app.post("/translate/document") |
|
async def translate_document_endpoint( |
|
file: UploadFile = File(...), |
|
source_lang: str = Form(...), |
|
target_lang: str = Form("ar") |
|
): |
|
"""Translates text extracted from an uploaded document.""" |
|
|
|
|
|
|
|
|
|
|
|
if target_lang != "ar": |
|
raise HTTPException(status_code=400, detail="Currently, only document translation to Arabic (ar) is supported.") |
|
|
|
|
|
if not os.path.exists(UPLOAD_DIR): |
|
try: |
|
os.makedirs(UPLOAD_DIR) |
|
except OSError as e: |
|
raise HTTPException(status_code=500, detail=f"Could not create upload directory: {e}") |
|
|
|
|
|
temp_file_path = os.path.join(UPLOAD_DIR, f"temp_{file.filename}") |
|
|
|
try: |
|
|
|
with open(temp_file_path, "wb") as buffer: |
|
shutil.copyfileobj(file.file, buffer) |
|
|
|
|
|
extracted_text = extract_text_from_file(temp_file_path, file.content_type) |
|
|
|
|
|
if not extracted_text: |
|
|
|
|
|
if os.path.exists(temp_file_path): |
|
os.remove(temp_file_path) |
|
raise HTTPException(status_code=400, detail="Could not extract any text from the document.") |
|
|
|
|
|
actual_source_lang = source_lang |
|
|
|
|
|
|
|
|
|
translated_text = translate_text_internal(extracted_text, actual_source_lang, target_lang) |
|
|
|
|
|
if os.path.exists(temp_file_path): |
|
os.remove(temp_file_path) |
|
|
|
return JSONResponse(content={ |
|
"original_filename": file.filename, |
|
"detected_source_lang": actual_source_lang, |
|
"translated_text": translated_text |
|
}) |
|
|
|
except HTTPException as http_exc: |
|
|
|
if os.path.exists(temp_file_path): |
|
try: |
|
os.remove(temp_file_path) |
|
except: |
|
pass |
|
raise http_exc |
|
except Exception as e: |
|
|
|
if os.path.exists(temp_file_path): |
|
try: |
|
os.remove(temp_file_path) |
|
except: |
|
pass |
|
raise HTTPException(status_code=500, detail=f"An unexpected error occurred processing the document: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
|
|
|
|
print(f"Template Directory: {TEMPLATE_DIR}") |
|
print(f"Static Directory: {STATIC_DIR}") |
|
print(f"Upload Directory: {UPLOAD_DIR}") |
|
|
|
if not os.path.exists(TEMPLATE_DIR): os.makedirs(TEMPLATE_DIR) |
|
if not os.path.exists(STATIC_DIR): os.makedirs(STATIC_DIR) |
|
if not os.path.exists(UPLOAD_DIR): os.makedirs(UPLOAD_DIR) |
|
|
|
if not os.path.exists(os.path.join(TEMPLATE_DIR, "index.html")): |
|
with open(os.path.join(TEMPLATE_DIR, "index.html"), "w") as f: |
|
f.write("<html><body><h1>Placeholder Frontend</h1></body></html>") |
|
|
|
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) |
|
|