Akbartus commited on
Commit
3bc38a4
·
1 Parent(s): ad65342

Create new file

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
2
+
3
+ import pytesseract as tsr
4
+ from PIL import Image
5
+
6
+ import sys, os
7
+ import gradio as gr
8
+
9
+ tsr.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
10
+
11
+ model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
12
+ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
13
+
14
+ def extractAndTranslate(image):
15
+ # Extract Text
16
+ extractedText = tsr.image_to_string(image, lang='eng+uzb')
17
+ extractedTextFormatted = ' '.join(extractedText.split('\n'))
18
+
19
+ # Translate
20
+ tokenizer.src_lang = "en"
21
+ encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
22
+ generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("uz"))
23
+
24
+ return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
25
+
26
+ demoApp = gr.Interface(extractAndTranslate, "image", "text")
27
+
28
+ demoApp.launch()