adil9858 commited on
Commit
b245ae3
Β·
verified Β·
1 Parent(s): b777d9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -72
app.py CHANGED
@@ -3,17 +3,17 @@ from openai import OpenAI
3
  import base64
4
  from PIL import Image
5
  import io
6
- from datetime import datetime
7
 
8
- # OpenAI client setup
9
  client = OpenAI(
10
  base_url="https://openrouter.ai/api/v1",
11
- api_key='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'
12
  )
13
 
14
  def analyze_image(image, prompt):
15
  if image is None:
16
- return "Please upload or capture an image first."
17
 
18
  # Convert image to base64
19
  buffered = io.BytesIO()
@@ -26,13 +26,7 @@ def analyze_image(image, prompt):
26
  messages=[
27
  {
28
  "role": "system",
29
- "content": """You are Dalton, an expert AI assistant specialized in image understanding.
30
- Your tasks include:
31
- - Extracting and structuring text from images
32
- - Answering questions about image content
33
- - Providing detailed descriptions
34
- - Analyzing receipts, documents, and other visual content
35
- Be thorough, accurate, and helpful in your responses."""
36
  },
37
  {
38
  "role": "user",
@@ -49,91 +43,70 @@ def analyze_image(image, prompt):
49
  ],
50
  max_tokens=2048
51
  )
52
-
53
- result = response.choices[0].message.content
54
- return result
55
-
56
  except Exception as e:
57
- return f"An error occurred: {str(e)}"
58
 
59
- # Custom CSS for better mobile experience
60
  css = """
61
- #mobile-camera { width: 100% !important; }
62
- #prompt-textbox { min-height: 100px !important; }
63
- .result-box {
64
- max-height: 500px;
65
- overflow-y: auto;
66
- padding: 15px;
67
- border: 1px solid #e0e0e0;
68
- border-radius: 8px;
69
- }
70
- .footer {
71
- margin-top: 20px;
72
- font-size: 12px;
73
- color: #666;
74
- text-align: center;
75
  }
76
  """
77
 
78
- with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo:
79
  gr.Markdown("""
80
- # 🧾 DaltonVision - InternVL3-14B
81
- ### Advanced Image Understanding β€’ Powered by OpenRouter β€’ Developed by [Koshur AI](https://koshurai.com)
82
  """)
83
 
84
- with gr.Row():
85
- with gr.Column():
86
- # Image input section
87
- image_input = gr.Image(
88
- sources=["upload", "webcam"],
89
  type="pil",
90
- label="Upload or Capture Image",
91
- elem_id="mobile-camera"
 
 
92
  )
93
 
94
- # Prompt input
95
- prompt_input = gr.Textbox(
96
- label="πŸ“ Enter your question or instruction",
97
- value="Extract all content structurally",
98
- lines=3,
99
- elem_id="prompt-textbox"
100
  )
101
 
102
- submit_btn = gr.Button("πŸ” Analyze Image", variant="primary")
103
 
104
  gr.Examples(
105
  examples=[
106
- ["What is the total amount on this receipt?"],
107
- ["List all items and their prices"],
108
- ["Who is the vendor and what is the date?"],
109
- ["Describe this image in detail"]
110
  ],
111
- inputs=[prompt_input],
112
- label="πŸ’‘ Try these example prompts:"
113
  )
114
 
115
- with gr.Column():
116
- # Result output
117
- result_output = gr.Markdown(
118
- label="βœ… Analysis Result",
119
- elem_classes="result-box"
 
120
  )
121
 
122
- # Footer
123
- gr.Markdown("""
124
- <div class="footer">
125
- Β© 2025 Koshur AI. All rights reserved.<br>
126
- Note: Images are processed in real-time and not stored.
127
- </div>
128
- """)
129
-
130
- # Button action
131
  submit_btn.click(
132
  fn=analyze_image,
133
- inputs=[image_input, prompt_input],
134
- outputs=result_output
135
  )
136
 
137
- # Launch the app
138
  if __name__ == "__main__":
139
- demo.launch()
 
3
  import base64
4
  from PIL import Image
5
  import io
6
+ import os
7
 
8
+ # Initialize OpenAI client with environment variable for API key
9
  client = OpenAI(
10
  base_url="https://openrouter.ai/api/v1",
11
+ api_key=os.environ.get("OPENROUTER_API_KEY")
12
  )
13
 
14
  def analyze_image(image, prompt):
15
  if image is None:
16
+ return "Please capture or upload an image first."
17
 
18
  # Convert image to base64
19
  buffered = io.BytesIO()
 
26
  messages=[
27
  {
28
  "role": "system",
29
+ "content": """You are an expert AI assistant specialized in image understanding."""
 
 
 
 
 
 
30
  },
31
  {
32
  "role": "user",
 
43
  ],
44
  max_tokens=2048
45
  )
46
+ return response.choices[0].message.content
 
 
 
47
  except Exception as e:
48
+ return f"Error: {str(e)}"
49
 
50
+ # Custom CSS for mobile optimization
51
  css = """
52
+ #camera-input {width: 100% !important;}
53
+ #camera-preview {max-width: 100%; margin: 0 auto;}
54
+ @media (max-width: 768px) {
55
+ #col-left {padding: 10px !important;}
56
+ #col-right {padding: 10px !important;}
 
 
 
 
 
 
 
 
 
57
  }
58
  """
59
 
60
+ with gr.Blocks(css=css, title="DaltonVision") as demo:
61
  gr.Markdown("""
62
+ # πŸ“Έ DaltonVision - Camera Analysis
63
+ ### Capture, Upload & Analyze Images with AI
64
  """)
65
 
66
+ with gr.Row(equal_height=True):
67
+ with gr.Column(elem_id="col-left"):
68
+ # Camera component with larger preview
69
+ camera = gr.Image(
70
+ sources=["webcam", "upload"],
71
  type="pil",
72
+ label="Take a picture or upload",
73
+ elem_id="camera-input",
74
+ interactive=True,
75
+ height=400
76
  )
77
 
78
+ prompt = gr.Textbox(
79
+ label="What would you like to know?",
80
+ placeholder="Describe this image...",
81
+ lines=3
 
 
82
  )
83
 
84
+ submit_btn = gr.Button("Analyze", variant="primary")
85
 
86
  gr.Examples(
87
  examples=[
88
+ ["What's written in this document?"],
89
+ ["Describe this scene in detail"],
90
+ ["Extract all text from this image"]
 
91
  ],
92
+ inputs=[prompt],
93
+ label="Try these prompts:"
94
  )
95
 
96
+ with gr.Column(elem_id="col-right"):
97
+ output = gr.Textbox(
98
+ label="Analysis Results",
99
+ interactive=False,
100
+ lines=15,
101
+ show_copy_button=True
102
  )
103
 
 
 
 
 
 
 
 
 
 
104
  submit_btn.click(
105
  fn=analyze_image,
106
+ inputs=[camera, prompt],
107
+ outputs=output
108
  )
109
 
110
+ # For Hugging Face Spaces deployment
111
  if __name__ == "__main__":
112
+ demo.launch(show_api=False)