Spaces:

ProfessorLeVesseur
/

VisionTexts

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Nov 20, 2024

Commit

d5a2742

verified ·

1 Parent(s): 1cc3f83

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -55

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import streamlit as st
 import base64
 from huggingface_hub import InferenceClient
 # Function to read the image file and return a base64-encoded string
@@ -16,7 +17,7 @@ st.image('MTSS.ai_Logo.png', width=image_width)
 st.header('VisionTexts™ | Accessibility')
 st.subheader('Image Alt Text Creator')
-# Initialize the Hugging Face InferenceClient with the API key from Streamlit secrets
 client = InferenceClient(api_key=st.secrets["huggingface_api_key"])
 # File uploader
@@ -35,7 +36,6 @@ if show_details:
     # Text input for additional details about the image
     additional_details = st.text_area(
         "The details could include specific information that is important to include in the alt text or reflect why the image is being used:",
-        disabled=not show_details
     )
 # Toggle for modifying the prompt for complex images
@@ -49,7 +49,7 @@ if complex_image:
     )
 # Button to trigger the analysis
-analyze_button = st.button("Analyze the Image", type="secondary")
 # Optimized prompt for complex images
 complex_image_prompt_text = (
@@ -64,59 +64,69 @@ if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image ..."):
         # Get base64-encoded image string
-        base64_image_string = get_image_base64(uploaded_file)
-        # Determine which prompt to use based on the complexity of the image
-        if complex_image:
-            prompt_text = complex_image_prompt_text
         else:
-            prompt_text = (
-                "As an expert in image accessibility and alternative text, succinctly describe the image provided in less than 125 characters. "
-                "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
-                "Skip phrases like 'image of' or 'picture of.' "
-                "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
-            )
-        if show_details and additional_details:
-            prompt_text += (
-                f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
-            )
-        # Create the payload for the completion request
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": prompt_text},
-                    {
-                        "type": "image",
-                        "image": {
-                            # Provide the base64-encoded image string
-                            "bytes": base64_image_string
-                        },
-                    },
-                ],
-            }
-        ]
-        # Make the request to the Hugging Face API
-        try:
-            # Send the request to the model
-            completion = client.chat.completions.create(
-                model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-                messages=messages,
-                max_tokens=500
-            )
-            # Extract the assistant's response
-            assistant_response = completion.choices[0].message['content']
-            # Display the response
-            st.markdown(assistant_response)
-            st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
 else:
     # Warning for user action required
     if not uploaded_file and analyze_button:

 import streamlit as st
 import base64
+import json
 from huggingface_hub import InferenceClient
 # Function to read the image file and return a base64-encoded string
 st.header('VisionTexts™ | Accessibility')
 st.subheader('Image Alt Text Creator')
+# Initialize the Hugging Face InferenceClient with the API key from secrets
 client = InferenceClient(api_key=st.secrets["huggingface_api_key"])
 # File uploader
     # Text input for additional details about the image
     additional_details = st.text_area(
         "The details could include specific information that is important to include in the alt text or reflect why the image is being used:",
     )
 # Toggle for modifying the prompt for complex images
     )
 # Button to trigger the analysis
+analyze_button = st.button("Analyze the Image")
 # Optimized prompt for complex images
 complex_image_prompt_text = (
     with st.spinner("Analyzing the image ..."):
         # Get base64-encoded image string
+        image_bytes = uploaded_file.read()
+        base64_image_string = base64.b64encode(image_bytes).decode('utf-8')
+        # Detect the image content type
+        import imghdr
+        image_type = imghdr.what(None, h=image_bytes)
+        if image_type is None:
+            st.error("Unsupported image type. Please upload a JPEG or PNG image.")
         else:
+            content_type = f"image/{image_type}"
+            # Determine which prompt to use based on the complexity of the image
+            if complex_image:
+                prompt_text = complex_image_prompt_text
+            else:
+                prompt_text = (
+                    "As an expert in image accessibility and alternative text, succinctly describe the image provided in less than 125 characters. "
+                    "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
+                    "Skip phrases like 'image of' or 'picture of.' "
+                    "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
+                )
+            if show_details and additional_details:
+                prompt_text += (
+                    f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
+                )
+            # Create the payload for the completion request
+            messages = [
+                {
+                    "role": "user",
+                    "content": prompt_text,
+                }
+            ]
+            # Attachments array containing the image
+            attachments = [
+                {
+                    "type": "image",
+                    "content": base64_image_string,
+                    "content_type": content_type,
+                }
+            ]
+            # Make the request to the Hugging Face API
+            try:
+                # Send the request to the model
+                completion = client.chat.completions.create(
+                    model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+                    messages=messages,
+                    attachments=attachments,
+                    max_tokens=500
+                )
+                # Extract the assistant's response
+                assistant_response = completion.choices[0].message['content']
+                # Display the response
+                st.markdown(assistant_response)
+                st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
 else:
     # Warning for user action required
     if not uploaded_file and analyze_button: