Spaces:

shukdevdatta123
/

MedicineOCR

Running

App Files Files Community

shukdevdatta123 commited on Apr 23

Commit

c7cc8ee

verified ·

1 Parent(s): c447daa

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -82

app.py CHANGED Viewed

@@ -1,38 +1,15 @@
 import gradio as gr
-from transformers.image_utils import load_image
-from threading import Thread
-import time
 import torch
 from PIL import Image
 from transformers import (
     Qwen2VLForConditionalGeneration,
     AutoProcessor,
     TextIteratorStreamer,
 )
-# ---------------------------
-# Helper Functions
-# ---------------------------
-def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
-    """
-    Returns an HTML snippet for a thin animated progress bar with a label.
-    """
-    return f'''
-<div style="display: flex; align-items: center;">
-    <span style="margin-right: 10px; font-size: 14px;">{label}</span>
-    <div style="width: 110px; height: 5px; background-color: {secondary_color}; border-radius: 2px; overflow: hidden;">
-        <div style="width: 100%; height: 100%; background-color: {primary_color}; animation: loading 1.5s linear infinite;"></div>
-    </div>
-</div>
-<style>
-@keyframes loading {{
-    0% {{ transform: translateX(-100%); }}
-    100% {{ transform: translateX(100%); }}
-}}
-</style>
-    '''
-# Model and Processor Setup - CPU version
 MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -41,23 +18,18 @@ model = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float32  # Using float32 for CPU compatibility
 ).to("cpu").eval()
-# Main Inference Function
-def extract_medicines(image_files):
     """Extract medicine names from prescription images."""
-    if not image_files:
         return "Please upload a prescription image."
-    # Handle file inputs correctly
-    image_paths = [file.name for file in image_files] if isinstance(image_files, list) else [image_files.name]
-    images = [load_image(path) for path in image_paths]
-    # Specific prompt to extract only medicine names
     text = "Extract ONLY the names of medications/medicines from this prescription image. Format the output as a numbered list of medicine names only, without dosages or instructions."
     messages = [{
         "role": "user",
         "content": [
-            *[{"type": "image", "image": image} for image in images],
             {"type": "text", "text": text},
         ],
     }]
@@ -65,59 +37,34 @@ def extract_medicines(image_files):
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
         text=[prompt_full],
-        images=images,
         return_tensors="pt",
         padding=True,
     ).to("cpu")
-    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    yield progress_bar_html("Extracting Medicine Names")
-    for new_text in streamer:
-        buffer += new_text
-        buffer = buffer.replace("<|im_end|>", "")
-        time.sleep(0.01)
-        yield buffer
-# Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Medicine Name Extractor")
-    gr.Markdown("Upload prescription images to extract medicine names")
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.File(
-                label="Upload Prescription Image(s)",
-                file_count="multiple",
-                file_types=["image"]
-            )
-            extract_btn = gr.Button("Extract Medicine Names", variant="primary")
-        with gr.Column():
-            output = gr.Markdown(label="Extracted Medicine Names")
-    extract_btn.click(
-        fn=extract_medicines,
-        inputs=image_input,
-        outputs=output
-    )
-    # Note: For examples to work with current Gradio versions, you need a different approach
-    # than what I previously provided. Remove examples for now to fix the immediate error.
-    gr.Markdown("""
-    ### Notes:
-    - This app is optimized to run on CPU
-    - Upload clear images of prescriptions for best results
-    - Only medicine names will be extracted
-    - Processing might take a minute or two on CPU
-    """)
-demo.queue()
-demo.launch(debug=True)

 import gradio as gr
 import torch
 from PIL import Image
+import time
+from threading import Thread
 from transformers import (
     Qwen2VLForConditionalGeneration,
     AutoProcessor,
     TextIteratorStreamer,
 )
+# Load model and processor - CPU version
 MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float32  # Using float32 for CPU compatibility
 ).to("cpu").eval()
+def extract_medicines(image):
     """Extract medicine names from prescription images."""
+    if image is None:
         return "Please upload a prescription image."
+    # Process the image
     text = "Extract ONLY the names of medications/medicines from this prescription image. Format the output as a numbered list of medicine names only, without dosages or instructions."
     messages = [{
         "role": "user",
         "content": [
+            {"type": "image", "image": Image.open(image)},
             {"type": "text", "text": text},
         ],
     }]
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
         text=[prompt_full],
+        images=[Image.open(image)],
         return_tensors="pt",
         padding=True,
     ).to("cpu")
+    # Generate response
+    with torch.no_grad():
+        output = model.generate(**inputs, max_new_tokens=512)
+    # Decode and return response
+    response = processor.decode(output[0], skip_special_tokens=True)
+    # Clean up the response to get just the model's answer
+    if "<|assistant|>" in response:
+        response = response.split("<|assistant|>")[1].strip()
+    return response
+# Create a simple Gradio interface
+demo = gr.Interface(
+    fn=extract_medicines,
+    inputs=gr.Image(type="filepath", label="Upload Prescription Image"),
+    outputs=gr.Textbox(label="Extracted Medicine Names"),
+    title="Medicine Name Extractor",
+    description="Upload prescription images to extract medicine names",
+    examples=[["examples/prescription1.jpg"]], # Update with your actual example paths or remove if not available
+    cache_examples=True,
+)
+if __name__ == "__main__":
+    demo.launch(debug=True)