Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse filesUpdate prompt
Using selected objects as image input
app.py
CHANGED
|
@@ -173,7 +173,7 @@ def chat_input_callback(*args):
|
|
| 173 |
|
| 174 |
|
| 175 |
|
| 176 |
-
def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None):
|
| 177 |
if isinstance(image_input, dict): # if upload from sketcher_input, input contains image and mask
|
| 178 |
image_input, mask = image_input['image'], image_input['mask']
|
| 179 |
|
|
@@ -207,7 +207,7 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
| 207 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
| 208 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
| 209 |
artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
| 210 |
-
paragraph = get_image_gpt(openai_api_key, new_image_path,"
|
| 211 |
|
| 212 |
state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
|
| 213 |
|
|
@@ -272,15 +272,16 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
| 272 |
|
| 273 |
generated_caption = text
|
| 274 |
print(generated_caption)
|
|
|
|
| 275 |
|
| 276 |
-
yield state, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state
|
| 277 |
|
| 278 |
|
| 279 |
|
| 280 |
|
| 281 |
def submit_caption(image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
| 282 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 283 |
-
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key):
|
| 284 |
print("state",state)
|
| 285 |
|
| 286 |
click_index = click_index_state
|
|
@@ -305,10 +306,10 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 305 |
}
|
| 306 |
|
| 307 |
prompt_list = [
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
|
| 313 |
|
| 314 |
if mapped_value != -1:
|
|
@@ -319,14 +320,13 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 319 |
sentiment=controls['sentiment'],
|
| 320 |
language=controls['language']
|
| 321 |
)
|
| 322 |
-
prompt+="You should generate a descriptive, coherent and human-like paragraph"
|
| 323 |
|
| 324 |
else:
|
| 325 |
print("error prompting")
|
| 326 |
prompt = "Invalid focus type."
|
| 327 |
|
| 328 |
if controls['factuality'] == "Imagination":
|
| 329 |
-
prompt += "
|
| 330 |
|
| 331 |
print("Prompt:", prompt)
|
| 332 |
print("click",click_index)
|
|
@@ -343,7 +343,9 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 343 |
|
| 344 |
|
| 345 |
if not args.disable_gpt and text_refiner:
|
| 346 |
-
|
|
|
|
|
|
|
| 347 |
state = state + [(None, f"Wiki: {paragraph}")]
|
| 348 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
| 349 |
print("new_cap",focus_info)
|
|
@@ -601,6 +603,7 @@ def create_ui():
|
|
| 601 |
input_mask_state = gr.State(np.zeros((1, 1)))
|
| 602 |
input_points_state = gr.State([])
|
| 603 |
input_labels_state = gr.State([])
|
|
|
|
| 604 |
|
| 605 |
|
| 606 |
|
|
@@ -874,7 +877,7 @@ def create_ui():
|
|
| 874 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
| 875 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
| 876 |
],
|
| 877 |
-
outputs=[chatbot, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state],
|
| 878 |
show_progress=False, queue=True
|
| 879 |
)
|
| 880 |
|
|
@@ -884,7 +887,7 @@ def create_ui():
|
|
| 884 |
inputs=[
|
| 885 |
image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
| 886 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 887 |
-
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key
|
| 888 |
],
|
| 889 |
outputs=[
|
| 890 |
chatbot, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|
|
|
|
| 173 |
|
| 174 |
|
| 175 |
|
| 176 |
+
def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None,language="English"):
|
| 177 |
if isinstance(image_input, dict): # if upload from sketcher_input, input contains image and mask
|
| 178 |
image_input, mask = image_input['image'], image_input['mask']
|
| 179 |
|
|
|
|
| 207 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
| 208 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
| 209 |
artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
| 210 |
+
paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
| 211 |
|
| 212 |
state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
|
| 213 |
|
|
|
|
| 272 |
|
| 273 |
generated_caption = text
|
| 274 |
print(generated_caption)
|
| 275 |
+
print("new crop save",new_crop_save_path)
|
| 276 |
|
| 277 |
+
yield state, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
|
| 278 |
|
| 279 |
|
| 280 |
|
| 281 |
|
| 282 |
def submit_caption(image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
| 283 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 284 |
+
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
| 285 |
print("state",state)
|
| 286 |
|
| 287 |
click_index = click_index_state
|
|
|
|
| 306 |
}
|
| 307 |
|
| 308 |
prompt_list = [
|
| 309 |
+
'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
|
| 310 |
+
'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
|
| 311 |
+
'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
|
| 312 |
+
]
|
| 313 |
|
| 314 |
|
| 315 |
if mapped_value != -1:
|
|
|
|
| 320 |
sentiment=controls['sentiment'],
|
| 321 |
language=controls['language']
|
| 322 |
)
|
|
|
|
| 323 |
|
| 324 |
else:
|
| 325 |
print("error prompting")
|
| 326 |
prompt = "Invalid focus type."
|
| 327 |
|
| 328 |
if controls['factuality'] == "Imagination":
|
| 329 |
+
prompt += "Assuming that I am someone who has viewed a lot of art and has a lot of experience viewing art. Explain artistic features (composition, color, style, or use of light) and discuss the symbolism of the content and its influence on later artistic movements"
|
| 330 |
|
| 331 |
print("Prompt:", prompt)
|
| 332 |
print("click",click_index)
|
|
|
|
| 343 |
|
| 344 |
|
| 345 |
if not args.disable_gpt and text_refiner:
|
| 346 |
+
print("new crop save",new_crop_save_path)
|
| 347 |
+
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
| 348 |
+
|
| 349 |
state = state + [(None, f"Wiki: {paragraph}")]
|
| 350 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
| 351 |
print("new_cap",focus_info)
|
|
|
|
| 603 |
input_mask_state = gr.State(np.zeros((1, 1)))
|
| 604 |
input_points_state = gr.State([])
|
| 605 |
input_labels_state = gr.State([])
|
| 606 |
+
new_crop_save_path = gr.State(None)
|
| 607 |
|
| 608 |
|
| 609 |
|
|
|
|
| 877 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
| 878 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
| 879 |
],
|
| 880 |
+
outputs=[chatbot, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
|
| 881 |
show_progress=False, queue=True
|
| 882 |
)
|
| 883 |
|
|
|
|
| 887 |
inputs=[
|
| 888 |
image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
| 889 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 890 |
+
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path
|
| 891 |
],
|
| 892 |
outputs=[
|
| 893 |
chatbot, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|