ProfessorLeVesseur commited on
Commit
d5a2742
·
verified ·
1 Parent(s): 1cc3f83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -55
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import streamlit as st
2
  import base64
 
3
  from huggingface_hub import InferenceClient
4
 
5
  # Function to read the image file and return a base64-encoded string
@@ -16,7 +17,7 @@ st.image('MTSS.ai_Logo.png', width=image_width)
16
  st.header('VisionTexts™ | Accessibility')
17
  st.subheader('Image Alt Text Creator')
18
 
19
- # Initialize the Hugging Face InferenceClient with the API key from Streamlit secrets
20
  client = InferenceClient(api_key=st.secrets["huggingface_api_key"])
21
 
22
  # File uploader
@@ -35,7 +36,6 @@ if show_details:
35
  # Text input for additional details about the image
36
  additional_details = st.text_area(
37
  "The details could include specific information that is important to include in the alt text or reflect why the image is being used:",
38
- disabled=not show_details
39
  )
40
 
41
  # Toggle for modifying the prompt for complex images
@@ -49,7 +49,7 @@ if complex_image:
49
  )
50
 
51
  # Button to trigger the analysis
52
- analyze_button = st.button("Analyze the Image", type="secondary")
53
 
54
  # Optimized prompt for complex images
55
  complex_image_prompt_text = (
@@ -64,59 +64,69 @@ if uploaded_file is not None and analyze_button:
64
 
65
  with st.spinner("Analyzing the image ..."):
66
  # Get base64-encoded image string
67
- base64_image_string = get_image_base64(uploaded_file)
68
-
69
- # Determine which prompt to use based on the complexity of the image
70
- if complex_image:
71
- prompt_text = complex_image_prompt_text
 
 
 
72
  else:
73
- prompt_text = (
74
- "As an expert in image accessibility and alternative text, succinctly describe the image provided in less than 125 characters. "
75
- "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
76
- "Skip phrases like 'image of' or 'picture of.' "
77
- "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
78
- )
79
-
80
- if show_details and additional_details:
81
- prompt_text += (
82
- f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
83
- )
84
-
85
- # Create the payload for the completion request
86
- messages = [
87
- {
88
- "role": "user",
89
- "content": [
90
- {"type": "text", "text": prompt_text},
91
- {
92
- "type": "image",
93
- "image": {
94
- # Provide the base64-encoded image string
95
- "bytes": base64_image_string
96
- },
97
- },
98
- ],
99
- }
100
- ]
101
-
102
- # Make the request to the Hugging Face API
103
- try:
104
- # Send the request to the model
105
- completion = client.chat.completions.create(
106
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
107
- messages=messages,
108
- max_tokens=500
109
- )
110
-
111
- # Extract the assistant's response
112
- assistant_response = completion.choices[0].message['content']
113
-
114
- # Display the response
115
- st.markdown(assistant_response)
116
-
117
- st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
118
- except Exception as e:
119
- st.error(f"An error occurred: {e}")
 
 
 
 
 
 
 
120
  else:
121
  # Warning for user action required
122
  if not uploaded_file and analyze_button:
 
1
  import streamlit as st
2
  import base64
3
+ import json
4
  from huggingface_hub import InferenceClient
5
 
6
  # Function to read the image file and return a base64-encoded string
 
17
  st.header('VisionTexts™ | Accessibility')
18
  st.subheader('Image Alt Text Creator')
19
 
20
+ # Initialize the Hugging Face InferenceClient with the API key from secrets
21
  client = InferenceClient(api_key=st.secrets["huggingface_api_key"])
22
 
23
  # File uploader
 
36
  # Text input for additional details about the image
37
  additional_details = st.text_area(
38
  "The details could include specific information that is important to include in the alt text or reflect why the image is being used:",
 
39
  )
40
 
41
  # Toggle for modifying the prompt for complex images
 
49
  )
50
 
51
  # Button to trigger the analysis
52
+ analyze_button = st.button("Analyze the Image")
53
 
54
  # Optimized prompt for complex images
55
  complex_image_prompt_text = (
 
64
 
65
  with st.spinner("Analyzing the image ..."):
66
  # Get base64-encoded image string
67
+ image_bytes = uploaded_file.read()
68
+ base64_image_string = base64.b64encode(image_bytes).decode('utf-8')
69
+
70
+ # Detect the image content type
71
+ import imghdr
72
+ image_type = imghdr.what(None, h=image_bytes)
73
+ if image_type is None:
74
+ st.error("Unsupported image type. Please upload a JPEG or PNG image.")
75
  else:
76
+ content_type = f"image/{image_type}"
77
+
78
+ # Determine which prompt to use based on the complexity of the image
79
+ if complex_image:
80
+ prompt_text = complex_image_prompt_text
81
+ else:
82
+ prompt_text = (
83
+ "As an expert in image accessibility and alternative text, succinctly describe the image provided in less than 125 characters. "
84
+ "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
85
+ "Skip phrases like 'image of' or 'picture of.' "
86
+ "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
87
+ )
88
+
89
+ if show_details and additional_details:
90
+ prompt_text += (
91
+ f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
92
+ )
93
+
94
+ # Create the payload for the completion request
95
+ messages = [
96
+ {
97
+ "role": "user",
98
+ "content": prompt_text,
99
+ }
100
+ ]
101
+
102
+ # Attachments array containing the image
103
+ attachments = [
104
+ {
105
+ "type": "image",
106
+ "content": base64_image_string,
107
+ "content_type": content_type,
108
+ }
109
+ ]
110
+
111
+ # Make the request to the Hugging Face API
112
+ try:
113
+ # Send the request to the model
114
+ completion = client.chat.completions.create(
115
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
116
+ messages=messages,
117
+ attachments=attachments,
118
+ max_tokens=500
119
+ )
120
+
121
+ # Extract the assistant's response
122
+ assistant_response = completion.choices[0].message['content']
123
+
124
+ # Display the response
125
+ st.markdown(assistant_response)
126
+
127
+ st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
128
+ except Exception as e:
129
+ st.error(f"An error occurred: {e}")
130
  else:
131
  # Warning for user action required
132
  if not uploaded_file and analyze_button: