paudelanil commited on
Commit
211ccfb
·
verified ·
1 Parent(s): 8b27076

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -35
app.py CHANGED
@@ -1,46 +1,15 @@
1
  import gradio as gr
2
- from transformers import VisionEncoderDecoderModel, TrOCRProcessor, AutoTokenizer
3
  from PIL import Image
 
 
4
 
5
- # Load the model and processor from Hugging Face
6
  tokenizer = AutoTokenizer.from_pretrained("paudelanil/trocr-devanagari")
7
  model = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
8
  feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
9
-
10
- processor = TrOCRProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
11
-
12
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
13
- model.to(device)
14
- def preprocess_image(image):
15
- # Resize while maintaining aspect ratio
16
- target_size = (224, 224)
17
- original_size = image.size
18
-
19
- # Calculate the new size while maintaining aspect ratio
20
- aspect_ratio = original_size[0] / original_size[1]
21
- if aspect_ratio > 1: # Width is greater than height
22
- new_width = target_size[0]
23
- new_height = int(target_size[0] / aspect_ratio)
24
- else: # Height is greater than width
25
- new_height = target_size[1]
26
- new_width = int(target_size[1] * aspect_ratio)
27
-
28
- # Resize the image
29
- resized_img = image.resize((new_width, new_height))
30
-
31
- # Calculate padding values
32
- padding_width = target_size[0] - new_width
33
- padding_height = target_size[1] - new_height
34
-
35
- # Apply padding to center the resized image
36
- pad_left = padding_width // 2
37
- pad_top = padding_height // 2
38
- pad_image = Image.new('RGB', target_size, (255, 255, 255)) # White background
39
- pad_image.paste(resized_img, (pad_left, pad_top))
40
-
41
- return pad_image
42
-
43
 
 
44
  def predict(image):
45
  # Preprocess the image
46
  image = Image.open(image).convert("RGB")
 
1
  import gradio as gr
2
+ from transformers import VisionEncoderDecoderModel, TrOCRProcessor,AutoTokenizer,ViTFeatureExtractor
3
  from PIL import Image
4
+ import torch
5
+
6
 
 
7
  tokenizer = AutoTokenizer.from_pretrained("paudelanil/trocr-devanagari")
8
  model = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
9
  feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
 
 
 
10
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ model.to(device)
13
  def predict(image):
14
  # Preprocess the image
15
  image = Image.open(image).convert("RGB")