Spaces:

Projetweb
/

AiServices

Runtime error

App Files Files Community

rayhane commited on Mar 17

Commit

8b3885d

1 Parent(s): 6de4c8a

Fix deployment

Browse files

Files changed (9) hide show

static/.dockerignore +6 -0
static/README.md +0 -10
static/README1.md +9 -0
static/app.py +7 -0
static/main.py +105 -0
static/requirements1.txt +0 -0
static/uploads/TD1 (1).docx +3 -0
static/uploads/Untitled.pdf +3 -0
static/uploads/chapitre 2 RECONNAISSANCE DE FORMES ET CLASSIFICATION.pdf +3 -0

static/.dockerignore ADDED Viewed

	@@ -0,0 +1,6 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
+venv/

static/README.md DELETED Viewed

@@ -1,10 +0,0 @@
-# Document Translation API
-Cette application permet de traduire des documents (TXT, PDF, DOCX) en plusieurs langues en utilisant FastAPI et les modèles de traduction de Hugging Face.
-## Comment l'utiliser ?
-1. **Uploader un document** (TXT, PDF, DOCX)
-2. **Choisir la langue source et la langue cible**
-3. **Obtenir la traduction instantanée !**
-Déployé sur Hugging Face Spaces avec Docker. 🚀

static/README1.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+title: "Document Translation Service"
+emoji: "🌍"
+colorFrom: "blue"
+colorTo: "green"
+sdk: "docker"
+app_file: "app.py"
+pinned: false
+---

static/app.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi import FastAPI
+app = FastAPI()
+@app.get("/")
+def home():
+    return {"message": "Hello, Huging Face!"}

static/main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""from fastapi import FastAPI, File, UploadFile, HTTPException, Form
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from transformers import pipeline
+import textwrap
+import fitz  # PyMuPDF for PDF
+from docx import Document
+import openpyxl  # For Excel
+from pptx import Presentation
+import os
+app = FastAPI()
+# Serve static files (like index.html)
+app.mount("/static", StaticFiles(directory="static"), name="static")
+@app.get("/", response_class=HTMLResponse)
+async def read_root():
+    with open("static/index.html", "r") as file:
+        html_content = file.read()
+    return HTMLResponse(content=html_content)
+# Language codes mapping
+LANGUAGE_CODES = {
+    "Anglais": "en",
+    "Français": "fr",
+    "Arabe": "ar",
+    "Espagnol": "es",
+    "Allemand": "de",
+    "Italien": "it",
+    "Portugais": "pt",
+    "Néerlandais": "nl"
+}
+# Function to load translation model for dynamic language pairs
+def load_translator(src_lang: str, tgt_lang: str):
+    src_code = LANGUAGE_CODES.get(src_lang)
+    tgt_code = LANGUAGE_CODES.get(tgt_lang)
+    if not src_code or not tgt_code:
+        raise ValueError(f"Unsupported language pair: {src_lang} -> {tgt_lang}")
+    try:
+        model_name = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}"
+        return pipeline("translation", model=model_name)
+    except Exception as e:
+        if src_code != "en" and tgt_code != "en":
+            model_src_to_en = pipeline("translation", model=f"Helsinki-NLP/opus-mt-{src_code}-en")
+            model_en_to_tgt = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{tgt_code}")
+            return (model_src_to_en, model_en_to_tgt)
+        else:
+            raise ValueError(f"No available translation model for {src_lang} -> {tgt_lang}")
+# Function to split text into manageable chunks
+def chunk_text(text, max_length=400):
+    return textwrap.wrap(text, max_length)
+# Extract text from different file types
+def extract_text(file: UploadFile):
+    if file.filename.endswith(".txt"):
+        return file.file.read().decode("utf-8")
+    elif file.filename.endswith(".pdf"):
+        doc = fitz.open(stream=file.file.read(), filetype="pdf")
+        return "\n".join([page.get_text() for page in doc])
+    elif file.filename.endswith(".docx"):
+        doc = Document(file.file)
+        return "\n".join([para.text for para in doc.paragraphs])
+    elif file.filename.endswith(".xlsx"):
+        wb = openpyxl.load_workbook(file.file)
+        sheets = wb.sheetnames
+        text = ""
+        for sheet in sheets:
+            ws = wb[sheet]
+            for row in ws.iter_rows():
+                text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
+        return text
+    elif file.filename.endswith(".pptx"):
+        prs = Presentation(file.file)
+        text = ""
+        for slide in prs.slides:
+            for shape in slide.shapes:
+                if hasattr(shape, "text"):
+                    text += shape.text + "\n"
+        return text
+    else:
+        raise HTTPException(status_code=400, detail="Unsupported file type.")
+# Upload and translate files
+@app.post("/upload/")
+async def upload_file(file: UploadFile = File(...), src_lang: str = Form(...), tgt_lang: str = Form(...)):
+    try:
+        text = extract_text(file)
+        translators = load_translator(src_lang, tgt_lang)
+        chunks = chunk_text(text)
+        if isinstance(translators, tuple):
+            translated_chunks = [translators[1](translators[0](chunk, max_length=400)[0]['translation_text'], max_length=400)[0]['translation_text'] for chunk in chunks]
+        else:
+            translated_chunks = [translators(chunk, max_length=400)[0]['translation_text'] for chunk in chunks]
+        translated_text = " ".join(translated_chunks)
+        return JSONResponse(content={"filename": file.filename, "translated_text": translated_text})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
+"""

static/requirements1.txt ADDED Viewed

Binary file (2.27 kB). View file

static/uploads/TD1 (1).docx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a80769ffc2bd30394ef3124842a08c2a72dda9c161488c363799f53520895ec2
+size 53350

static/uploads/Untitled.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb03deee8b41e265cde7eb86f14deae4cc505d9fef18a3cdaccde123f7ca6ae8
+size 349803

static/uploads/chapitre 2 RECONNAISSANCE DE FORMES ET CLASSIFICATION.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cad0c7e09e566f603390d90611e4fd0c969d6920a064b4cf74fdbcde58fc8c7e
+size 1665388