Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,46 +1,15 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import VisionEncoderDecoderModel, TrOCRProcessor,
|
3 |
from PIL import Image
|
|
|
|
|
4 |
|
5 |
-
# Load the model and processor from Hugging Face
|
6 |
tokenizer = AutoTokenizer.from_pretrained("paudelanil/trocr-devanagari")
|
7 |
model = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
|
8 |
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
|
9 |
-
|
10 |
-
processor = TrOCRProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
|
11 |
-
|
12 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
13 |
-
model.to(device)
|
14 |
-
def preprocess_image(image):
|
15 |
-
# Resize while maintaining aspect ratio
|
16 |
-
target_size = (224, 224)
|
17 |
-
original_size = image.size
|
18 |
-
|
19 |
-
# Calculate the new size while maintaining aspect ratio
|
20 |
-
aspect_ratio = original_size[0] / original_size[1]
|
21 |
-
if aspect_ratio > 1: # Width is greater than height
|
22 |
-
new_width = target_size[0]
|
23 |
-
new_height = int(target_size[0] / aspect_ratio)
|
24 |
-
else: # Height is greater than width
|
25 |
-
new_height = target_size[1]
|
26 |
-
new_width = int(target_size[1] * aspect_ratio)
|
27 |
-
|
28 |
-
# Resize the image
|
29 |
-
resized_img = image.resize((new_width, new_height))
|
30 |
-
|
31 |
-
# Calculate padding values
|
32 |
-
padding_width = target_size[0] - new_width
|
33 |
-
padding_height = target_size[1] - new_height
|
34 |
-
|
35 |
-
# Apply padding to center the resized image
|
36 |
-
pad_left = padding_width // 2
|
37 |
-
pad_top = padding_height // 2
|
38 |
-
pad_image = Image.new('RGB', target_size, (255, 255, 255)) # White background
|
39 |
-
pad_image.paste(resized_img, (pad_left, pad_top))
|
40 |
-
|
41 |
-
return pad_image
|
42 |
-
|
43 |
|
|
|
44 |
def predict(image):
|
45 |
# Preprocess the image
|
46 |
image = Image.open(image).convert("RGB")
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import VisionEncoderDecoderModel, TrOCRProcessor,AutoTokenizer,ViTFeatureExtractor
|
3 |
from PIL import Image
|
4 |
+
import torch
|
5 |
+
|
6 |
|
|
|
7 |
tokenizer = AutoTokenizer.from_pretrained("paudelanil/trocr-devanagari")
|
8 |
model = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
|
9 |
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
|
|
|
|
|
|
|
10 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
model.to(device)
|
13 |
def predict(image):
|
14 |
# Preprocess the image
|
15 |
image = Image.open(image).convert("RGB")
|