Spaces:

adil9858
/

dalton_vision

Sleeping

App Files Files Community

adil9858 commited on May 2

Commit

b245ae3

verified ·

1 Parent(s): b777d9e

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -72

app.py CHANGED Viewed

@@ -3,17 +3,17 @@ from openai import OpenAI
 import base64
 from PIL import Image
 import io
-from datetime import datetime
-# OpenAI client setup
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
-    api_key='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'
 )
 def analyze_image(image, prompt):
     if image is None:
-        return "Please upload or capture an image first."
     # Convert image to base64
     buffered = io.BytesIO()
@@ -26,13 +26,7 @@ def analyze_image(image, prompt):
             messages=[
                 {
                     "role": "system",
-                    "content": """You are Dalton, an expert AI assistant specialized in image understanding.
-                    Your tasks include:
-                    - Extracting and structuring text from images
-                    - Answering questions about image content
-                    - Providing detailed descriptions
-                    - Analyzing receipts, documents, and other visual content
-                    Be thorough, accurate, and helpful in your responses."""
                 },
                 {
                     "role": "user",
@@ -49,91 +43,70 @@ def analyze_image(image, prompt):
             ],
             max_tokens=2048
         )
-        result = response.choices[0].message.content
-        return result
     except Exception as e:
-        return f"An error occurred: {str(e)}"
-# Custom CSS for better mobile experience
 css = """
-#mobile-camera { width: 100% !important; }
-#prompt-textbox { min-height: 100px !important; }
-.result-box {
-    max-height: 500px;
-    overflow-y: auto;
-    padding: 15px;
-    border: 1px solid #e0e0e0;
-    border-radius: 8px;
-}
-.footer {
-    margin-top: 20px;
-    font-size: 12px;
-    color: #666;
-    text-align: center;
 }
 """
-with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo:
     gr.Markdown("""
-    # 🧾 DaltonVision - InternVL3-14B
-    ### Advanced Image Understanding • Powered by OpenRouter • Developed by [Koshur AI](https://koshurai.com)
     """)
-    with gr.Row():
-        with gr.Column():
-            # Image input section
-            image_input = gr.Image(
-                sources=["upload", "webcam"],
                 type="pil",
-                label="Upload or Capture Image",
-                elem_id="mobile-camera"
             )
-            # Prompt input
-            prompt_input = gr.Textbox(
-                label="📝 Enter your question or instruction",
-                value="Extract all content structurally",
-                lines=3,
-                elem_id="prompt-textbox"
             )
-            submit_btn = gr.Button("🔍 Analyze Image", variant="primary")
             gr.Examples(
                 examples=[
-                    ["What is the total amount on this receipt?"],
-                    ["List all items and their prices"],
-                    ["Who is the vendor and what is the date?"],
-                    ["Describe this image in detail"]
                 ],
-                inputs=[prompt_input],
-                label="💡 Try these example prompts:"
             )
-        with gr.Column():
-            # Result output
-            result_output = gr.Markdown(
-                label="✅ Analysis Result",
-                elem_classes="result-box"
             )
-    # Footer
-    gr.Markdown("""
-    <div class="footer">
-    © 2025 Koshur AI. All rights reserved.<br>
-    Note: Images are processed in real-time and not stored.
-    </div>
-    """)
-    # Button action
     submit_btn.click(
         fn=analyze_image,
-        inputs=[image_input, prompt_input],
-        outputs=result_output
     )
-# Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import base64
 from PIL import Image
 import io
+import os
+# Initialize OpenAI client with environment variable for API key
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
+    api_key=os.environ.get("OPENROUTER_API_KEY")
 )
 def analyze_image(image, prompt):
     if image is None:
+        return "Please capture or upload an image first."
     # Convert image to base64
     buffered = io.BytesIO()
             messages=[
                 {
                     "role": "system",
+                    "content": """You are an expert AI assistant specialized in image understanding."""
                 },
                 {
                     "role": "user",
             ],
             max_tokens=2048
         )
+        return response.choices[0].message.content
     except Exception as e:
+        return f"Error: {str(e)}"
+# Custom CSS for mobile optimization
 css = """
+#camera-input {width: 100% !important;}
+#camera-preview {max-width: 100%; margin: 0 auto;}
+@media (max-width: 768px) {
+    #col-left {padding: 10px !important;}
+    #col-right {padding: 10px !important;}
 }
 """
+with gr.Blocks(css=css, title="DaltonVision") as demo:
     gr.Markdown("""
+    # 📸 DaltonVision - Camera Analysis
+    ### Capture, Upload & Analyze Images with AI
     """)
+    with gr.Row(equal_height=True):
+        with gr.Column(elem_id="col-left"):
+            # Camera component with larger preview
+            camera = gr.Image(
+                sources=["webcam", "upload"],
                 type="pil",
+                label="Take a picture or upload",
+                elem_id="camera-input",
+                interactive=True,
+                height=400
             )
+            prompt = gr.Textbox(
+                label="What would you like to know?",
+                placeholder="Describe this image...",
+                lines=3
             )
+            submit_btn = gr.Button("Analyze", variant="primary")
             gr.Examples(
                 examples=[
+                    ["What's written in this document?"],
+                    ["Describe this scene in detail"],
+                    ["Extract all text from this image"]
                 ],
+                inputs=[prompt],
+                label="Try these prompts:"
             )
+        with gr.Column(elem_id="col-right"):
+            output = gr.Textbox(
+                label="Analysis Results",
+                interactive=False,
+                lines=15,
+                show_copy_button=True
             )
     submit_btn.click(
         fn=analyze_image,
+        inputs=[camera, prompt],
+        outputs=output
     )
+# For Hugging Face Spaces deployment
 if __name__ == "__main__":
+    demo.launch(show_api=False)