Akbartus's picture
Update app.py
da6e6a5
raw
history blame
927 Bytes
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import pytesseract as tsr
from PIL import Image
import sys, os
import gradio as gr
tsr.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
def extractAndTranslate(image):
# Extract Text
extractedText = tsr.image_to_string(image, lang='eng')
extractedTextFormatted = ' '.join(extractedText.split('\n'))
# Translate
tokenizer.src_lang = "en"
encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("ru"))
return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
demoApp = gr.Interface(extractAndTranslate, "image", "text")
demoApp.launch()