Odulana Hammed commited on
Commit
e0dc9e6
·
verified ·
1 Parent(s): 0020f51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -36
app.py CHANGED
@@ -1,49 +1,51 @@
1
- import gradio as gr
2
- from transformers import AutoProcessor, MllamaForConditionalGeneration
3
  from PIL import Image
4
  import torch
5
- import time
6
  import spaces
7
 
8
- # Load Vision-Instruct model
9
  ckpt = "alpindale/Llama-3.2-11B-Vision-Instruct"
10
- model = MllamaForConditionalGeneration.from_pretrained(ckpt, torch_dtype=torch.bfloat16).to("cuda")
 
 
 
11
  processor = AutoProcessor.from_pretrained(ckpt)
12
 
13
- # Define the function to extract text from the image
14
  @spaces.GPU
15
- def extract_text_from_image(image, max_new_tokens=250):
16
- """
17
- Extract handwritten text from the image using Meta-Llama Vision-Instruct.
18
- """
19
- try:
20
- # Process the image
21
- inputs = processor(images=image, return_tensors="pt").to("cuda")
22
-
23
- # Generate the prediction
24
- outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
25
-
26
- # Decode the generated text
27
- extracted_text = processor.decode(outputs[0], skip_special_tokens=True)
28
-
29
- return extracted_text
30
- except Exception as e:
31
- return f"An error occurred: {str(e)}"
32
-
33
- # Define Gradio interface for image upload and text extraction
34
- title = "Handwritten Text Extraction"
35
- description = """
36
- Upload an image with handwritten text, and this app will use Meta-Llama Vision-Instruct to extract the text.
37
- """
 
38
 
 
39
  demo = gr.Interface(
40
- fn=extract_text_from_image,
41
- inputs=gr.Image(type="pil", label="Upload Handwritten Image"),
42
  outputs=gr.Textbox(label="Extracted Text"),
43
- title=title,
44
- description=description,
45
- live=False # Disable live updates since the extraction will happen after the user submits
46
  )
47
 
48
- if __name__ == "__main__":
49
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
 
2
  from PIL import Image
3
  import torch
4
+ import gradio as gr
5
  import spaces
6
 
7
+ # Initialize model and processor
8
  ckpt = "alpindale/Llama-3.2-11B-Vision-Instruct"
9
+ model = MllamaForConditionalGeneration.from_pretrained(
10
+ ckpt,
11
+ torch_dtype=torch.bfloat16
12
+ ).to("cuda")
13
  processor = AutoProcessor.from_pretrained(ckpt)
14
 
 
15
  @spaces.GPU
16
+ def extract_text(image):
17
+ # Convert image to RGB
18
+ image = Image.open(image).convert("RGB")
19
+
20
+ # Create message structure
21
+ messages = [
22
+ {
23
+ "role": "user",
24
+ "content": [
25
+ {"type": "text", "text": "Extract handwritten text from the image"},
26
+ {"type": "image"}
27
+ ]
28
+ }
29
+ ]
30
+
31
+ # Process input
32
+ texts = processor.apply_chat_template(messages, add_generation_prompt=True)
33
+ inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
34
+
35
+ # Generate output
36
+ outputs = model.generate(**inputs, max_new_tokens=250)
37
+ result = processor.decode(outputs[0], skip_special_tokens=True)
38
+
39
+ return result
40
 
41
+ # Create Gradio interface
42
  demo = gr.Interface(
43
+ fn=extract_text,
44
+ inputs=gr.Image(type="filepath", label="Upload Image"),
45
  outputs=gr.Textbox(label="Extracted Text"),
46
+ title="Handwritten Text Extractor",
47
+ description="Upload an image containing handwritten text to extract its content.",
 
48
  )
49
 
50
+ # Launch the app
51
+ demo.launch(debug=True)