Akbartus's picture
Create new file
3bc38a4
raw
history blame
931 Bytes
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import pytesseract as tsr
from PIL import Image
import sys, os
import gradio as gr
tsr.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
def extractAndTranslate(image):
# Extract Text
extractedText = tsr.image_to_string(image, lang='eng+uzb')
extractedTextFormatted = ' '.join(extractedText.split('\n'))
# Translate
tokenizer.src_lang = "en"
encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("uz"))
return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
demoApp = gr.Interface(extractAndTranslate, "image", "text")
demoApp.launch()