Akbartus commited on
Commit
6c59ea1
·
1 Parent(s): fb7548b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -8,18 +8,18 @@ import gradio as gr
8
 
9
  tsr.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
10
 
11
- model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
12
- tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
13
 
14
  def extractAndTranslate(image):
15
  # Extract Text
16
- extractedText = tsr.image_to_string(image, lang='eng+uzb')
17
  extractedTextFormatted = ' '.join(extractedText.split('\n'))
18
 
19
  # Translate
20
  tokenizer.src_lang = "en"
21
  encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
22
- generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("uz"))
23
 
24
  return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
25
 
 
8
 
9
  tsr.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
10
 
11
+ model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
12
+ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
13
 
14
  def extractAndTranslate(image):
15
  # Extract Text
16
+ extractedText = tsr.image_to_string(image, lang='eng+rus')
17
  extractedTextFormatted = ' '.join(extractedText.split('\n'))
18
 
19
  # Translate
20
  tokenizer.src_lang = "en"
21
  encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
22
+ generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("ru"))
23
 
24
  return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
25