Spaces:

luck210
/

gemma

Sleeping

File size: 1,779 Bytes

from fastapi import FastAPI, File, UploadFile
from transformers import pipeline
import PyPDF2
import docx
import os
import uvicorn
from io import BytesIO

app = FastAPI()

# Charger le modèle de traduction depuis Hugging Face (Exemple : Anglais -> Français)
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")

def extract_text_from_pdf(file: BytesIO) -> str:
    """Extrait le texte d'un fichier PDF."""
    reader = PyPDF2.PdfReader(file)
    text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
    return text

def extract_text_from_docx(file: BytesIO) -> str:
    """Extrait le texte d'un fichier DOCX."""
    doc = docx.Document(file)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

@app.post("/translate/")
async def translate_file(file: UploadFile = File(...)):
    """Endpoint pour traduire un fichier PDF ou DOCX."""
    try:
        contents = await file.read()
        file_io = BytesIO(contents)
        file_extension = file.filename.split(".")[-1]

        if file_extension == "pdf":
            text = extract_text_from_pdf(file_io)
        elif file_extension == "docx":
            text = extract_text_from_docx(file_io)
        else:
            return {"error": "Format non supporté. Utilisez PDF ou DOCX."}

        # Traduire le texte
        translation = translator(text, max_length=1000)
        translated_text = " ".join([t["translation_text"] for t in translation])

        return {"original_text": text[:500], "translated_text": translated_text[:500]}  # Limite pour affichage
    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run(app, host="0.0.0.0", port=port)