ProfessorLeVesseur commited on
Commit
7f1c702
·
verified ·
1 Parent(s): 2418a7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -129
app.py CHANGED
@@ -1,131 +1,3 @@
1
- # import streamlit as st
2
- # import requests
3
- # from PIL import Image
4
- # import io
5
-
6
- # # Streamlit page setup
7
- # st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
8
-
9
- # # Add the logo image with a specified width
10
- # image_width = 300 # Set the desired width in pixels
11
- # st.image('MTSS.ai_Logo.png', width=image_width)
12
-
13
- # st.header('VisionTexts™ | Accessibility')
14
- # st.subheader('Image Alt Text Creator')
15
-
16
- # # Retrieve the Hugging Face API Key from secrets
17
- # huggingface_api_key = st.secrets["huggingface_api_key"]
18
-
19
- # # API endpoints
20
- # # API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
21
- # API_URL_CAPTION = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
22
- # API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
23
-
24
- # headers = {
25
- # "Authorization": f"Bearer {huggingface_api_key}",
26
- # "Content-Type": "application/json"
27
- # }
28
-
29
- # # File uploader allows user to add their own image
30
- # uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
31
-
32
- # if uploaded_file:
33
- # # Display the uploaded image
34
- # image = Image.open(uploaded_file).convert('RGB')
35
- # image_width = 200 # Set the desired width in pixels
36
- # with st.expander("Image", expanded=True):
37
- # st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
38
- # else:
39
- # st.warning("Please upload an image.")
40
-
41
- # # Option for adding additional details
42
- # show_details = st.checkbox("Add additional details about the image.", value=False)
43
-
44
- # if show_details:
45
- # # Text input for additional details about the image
46
- # additional_details = st.text_area(
47
- # "Provide specific information that is important to include in the alt text or reflect why the image is being used:"
48
- # )
49
- # else:
50
- # additional_details = ""
51
-
52
- # # Button to trigger the analysis
53
- # analyze_button = st.button("Analyze the Image", type="secondary")
54
-
55
- # # Prompt for complex image description
56
- # complex_image_prompt_text = (
57
- # "As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
58
- # "Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
59
- # "Skip phrases like 'image of' or 'picture of.' "
60
- # "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
61
- # )
62
-
63
- # # Functions to query the Hugging Face Inference API
64
- # def query_image_caption(image):
65
- # # Convert PIL image to bytes
66
- # buffered = io.BytesIO()
67
- # image.save(buffered, format="JPEG")
68
- # image_bytes = buffered.getvalue()
69
-
70
- # response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
71
- # return response.json()
72
-
73
- # def query_llm(prompt):
74
- # payload = {
75
- # "inputs": prompt,
76
- # "parameters": {
77
- # "max_new_tokens": 500,
78
- # "return_full_text": False,
79
- # "do_sample": True,
80
- # "temperature": 0.7,
81
- # "top_p": 0.9
82
- # },
83
- # "options": {
84
- # "wait_for_model": True
85
- # }
86
- # }
87
-
88
- # response = requests.post(API_URL_LLM, headers=headers, json=payload)
89
- # return response.json()
90
-
91
- # # Check if an image has been uploaded and if the button has been pressed
92
- # if uploaded_file is not None and analyze_button:
93
- # with st.spinner("Analyzing the image..."):
94
- # # Get the caption from the image using the image captioning API
95
- # caption_response = query_image_caption(image)
96
-
97
- # # Handle potential errors from the API
98
- # if isinstance(caption_response, dict) and caption_response.get("error"):
99
- # st.error(f"Error with image captioning model: {caption_response['error']}")
100
- # else:
101
- # image_caption = caption_response[0]['generated_text']
102
-
103
- # # Use the complex image prompt text
104
- # prompt_text = complex_image_prompt_text
105
-
106
- # # Include additional details if provided
107
- # if additional_details:
108
- # prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
109
-
110
- # # Create the prompt for the language model
111
- # full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
112
-
113
- # # Use the language model to generate the alt text description
114
- # llm_response = query_llm(full_prompt)
115
-
116
- # # Handle potential errors from the API
117
- # if isinstance(llm_response, dict) and llm_response.get("error"):
118
- # st.error(f"Error with language model: {llm_response['error']}")
119
- # else:
120
- # generated_text = llm_response[0]['generated_text'].strip()
121
- # st.markdown("### Generated Alt Text:")
122
- # st.write(generated_text)
123
-
124
- # st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
125
- # else:
126
- # st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")
127
-
128
-
129
  import streamlit as st
130
  import requests
131
  from PIL import Image
@@ -193,7 +65,7 @@ def query_image_caption(image):
193
  # Use the InferenceClient's image_to_text method
194
  response = client.image_to_text(
195
  # model="Salesforce/blip-image-captioning-large",
196
- model="microsoft/Florence-2-base-ft",
197
  image=image_bytes,
198
  )
199
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
3
  from PIL import Image
 
65
  # Use the InferenceClient's image_to_text method
66
  response = client.image_to_text(
67
  # model="Salesforce/blip-image-captioning-large",
68
+ model="nlpconnect/vit-gpt2-image-captioning",
69
  image=image_bytes,
70
  )
71
  return response