Dy100 commited on
Commit
1d52f21
·
verified ·
1 Parent(s): 3f0b2d5

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +2 -8
  2. app.py +93 -0
  3. requirement.txt +4 -0
  4. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Visionbuddy
3
- emoji: 🐨
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.39.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: visionbuddy
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.39.0
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import (
3
+ PaliGemmaProcessor,
4
+ PaliGemmaForConditionalGeneration,
5
+ )
6
+ import torch
7
+ from PIL import Image
8
+ import numpy as np
9
+
10
+ # Device
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ print(f"Using device: {device}")
13
+
14
+ # Load model and processor
15
+ model_id = "google/paligemma2-3b-mix-448"
16
+ model = PaliGemmaForConditionalGeneration.from_pretrained(
17
+ model_id,
18
+ torch_dtype=torch.float32,
19
+ device_map="auto",
20
+ low_cpu_mem_usage=True
21
+ ).eval()
22
+ processor = PaliGemmaProcessor.from_pretrained(model_id)
23
+ print("Model and processor loaded successfully")
24
+
25
+ # Process image
26
+ def process_image(image, task_type, question="", objects=""):
27
+ try:
28
+ if task_type == "Describe Image":
29
+ prompt = "describe en"
30
+ elif task_type == "OCR Text Recognition":
31
+ prompt = "ocr"
32
+ elif task_type == "Answer Question":
33
+ prompt = f"answer en {question}"
34
+ elif task_type == "Detect Objects":
35
+ prompt = f"detect {objects}"
36
+ else:
37
+ return "Please select a valid task."
38
+
39
+ if isinstance(image, np.ndarray):
40
+ image = Image.fromarray(image)
41
+
42
+ model_inputs = processor(text=prompt, images=image, return_tensors="pt")
43
+ model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
44
+ input_len = model_inputs["input_ids"].shape[-1]
45
+
46
+ with torch.inference_mode():
47
+ generation = model.generate(
48
+ **model_inputs,
49
+ max_new_tokens=100,
50
+ do_sample=False
51
+ )
52
+ generation = generation[0][input_len:]
53
+ result = processor.decode(generation, skip_special_tokens=True)
54
+
55
+ return result
56
+ except Exception as e:
57
+ return f"Error during processing: {str(e)}"
58
+
59
+ # Elegant website-style CSS
60
+ custom_css = """
61
+
62
+ """
63
+
64
+ # Gradio app
65
+ with gr.Blocks(css=custom_css) as demo:
66
+ gr.Markdown("""<h1>PaliGemma 2 Visual AI Assistant</h1>""")
67
+
68
+ with gr.Row():
69
+ with gr.Column():
70
+ image_input = gr.Image(label="Upload Image", elem_classes="image-preview")
71
+ task_type = gr.Radio(
72
+ choices=["Describe Image", "OCR Text Recognition", "Answer Question", "Detect Objects"],
73
+ label="Choose Task",
74
+ value="Describe Image"
75
+ )
76
+ question_input = gr.Textbox(label="Question", placeholder="Type a question", visible=False)
77
+ objects_input = gr.Textbox(label="Objects to Detect", placeholder="e.g., cat; car", visible=False)
78
+ submit_btn = gr.Button("🔍 Analyze")
79
+
80
+ with gr.Column():
81
+ output_text = gr.Textbox(label="Result", lines=10)
82
+
83
+ def update_inputs(task):
84
+ return {
85
+ question_input: gr.update(visible=(task == "Answer Question")),
86
+ objects_input: gr.update(visible=(task == "Detect Objects"))
87
+ }
88
+
89
+ task_type.change(fn=update_inputs, inputs=[task_type], outputs=[question_input, objects_input])
90
+ submit_btn.click(fn=process_image, inputs=[image_input, task_type, question_input, objects_input], outputs=output_text)
91
+
92
+ if __name__ == "__main__":
93
+ demo.launch(share=True, inbrowser=True)
requirement.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ Pillow
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ Pillow