Smart_Resume / app.py
ZELEFACK's picture
Update app.py
4ae13dc
raw
history blame
5.51 kB
#!/usr/bin/env python
# coding: utf-8
from gtts import gTTS
import gradio as gr
from PyPDF2 import PdfFileReader
from googletrans import Translator
import googletrans
import numpy as np
import requests
from PIL import Image
import pytesseract
# from docx import Document
cnt = 0
langues = googletrans.LANGUAGES
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": "Bearer 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"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def get_key(val):
for key, value in langues.items():
if val == value:
return key
def read_article(file_name):
name = file_name.name.replace("\\",'/')
file = None
article = ""
if name.endswith(".txt"):
file = open(name, "r")
filedata = file.readlines()
for e in filedata :
article = article + e
if name.endswith(".pdf"):
# article = textract.process('document_path.PDF', method='PDFminer')
document = PdfFileReader(open(name, 'rb'))
for page in range(document.numPages):
pageObj = document.getPage(page)
article += pageObj.extractText().replace('\n','')
if name.endswith(".docx"):
pass
# doc = Document(name)
# article = None
# for para in doc.paragraphs:
# article = article + para.text
if name.endswith(".jpg") or name.endswith(".png") or name.endswith(".jpeg"):
img = Image.open(name)
# path where the tesseract module is installed
pytesseract.pytesseract.tesseract_cmd ='C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
# converts the image to result and saves it into result variable
result = pytesseract.image_to_string(img)
return article
def translate_data(text, final_language):
translator = Translator()
translation = translator.translate(text, dest=get_key(final_language))
return translation.text
def generate_summary(file_name, mode,final_language):
# Step 1 - Read text anc split it
global cnt
sentences = read_article(file_name)
translator = Translator()
cnt +=1
if mode == "traduction":
text_translate = translate_data(sentences,final_language)
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
myobj.save(f"audio_traduce{cnt}.wav")
return f"audio_traduce{cnt}.wav", text_translate
elif mode=="lecture":
text = translator.translate(sentences)
text_translate = sentences
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
myobj.save(f"audio_lecture{cnt}.wav")
return f"audio_lecture{cnt}.wav", text_translate
elif mode == "resume_et_traduire":
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translate_data(text_translate,final_language)
text_translate = text
myobj = gTTS(text=text, lang=get_key(final_language), slow=False)
myobj.save(f"audio_resume_traduire{cnt}.wav")
return f"audio_resume_traduire{cnt}.wav", text_translate
else:
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translator.translate(text_translate)
myobj = gTTS(text=text_translate, lang=text.src, slow=False)
myobj.save(f"audio_resume{cnt}.wav")
return f"audio_resume{cnt}.wav", text_translate
iface = gr.Interface(
fn=generate_summary,
inputs=[
gr.inputs.File( file_count="single",type="file", label="Fichier à Traduire"),
gr.inputs.Radio(['resume', 'traduction','resume_et_traduire','lecture'], label="Choix du mode de fonctionnement"),
gr.inputs.Radio(['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani',
'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa',
'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish',
'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian',
'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian',
'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish',
'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)',
'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy',
'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali',
'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian',
'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian',
'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish',
'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'],label="Langage à traduire")],
outputs= [gr.outputs.Audio(type="file", label="Audio du livre")
,gr.outputs.Textbox(label="resultat")],
theme="dark-seafoam")
iface.launch()
# GPS ou GSM qui a le GPS (150k, 15k)