Spaces:

nvidia
/

audio-flamingo-3

Running on A100

SreyanG-NVIDIA commited on 2 days ago

Commit

e6023f9

verified ·

1 Parent(s): 8ba4304

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -54,7 +54,7 @@ def speech_prompt_infer(audio_prompt_file):
 def think_infer(audio_file, prompt_text):
     try:
         sound = llava.Sound(audio_file)
-        full_prompt = f"<sound>\n{prompt_text}"
         response = model_think.generate_content([sound, full_prompt], generation_config=generation_config_single)
         return response
     except Exception as e:
@@ -183,7 +183,7 @@ with gr.Blocks(css="""
                             ["static/think/audio1.wav", "What are the two people doing in the audio Choose the correct option from the following options:\n(A) One person is demonstrating how to use the equipment\n(B) The two people are discussing how to use the equipment\n(C) The two people are disassembling the equipment\n(D) One person is teaching another person how to use a piece of equipment\nPlease think and reason about the input audio before you respond."],
                             ["static/think/audio2.wav", "Is the boat in the video moving closer or further away? Choose the correct option from the following options:\n(A) Closer\n(B) Further\nPlease think and reason about the input audio before you respond."],
                         ],
-                        inputs=[audio_input_think, prompt_input_think+"\nPlease think and reason about the input music before you respond."],
                         label="🧪 Try Examples"
                     )

 def think_infer(audio_file, prompt_text):
     try:
         sound = llava.Sound(audio_file)
+        full_prompt = f"<sound>\n{prompt_text}\nPlease think and reason about the input music before you respond."
         response = model_think.generate_content([sound, full_prompt], generation_config=generation_config_single)
         return response
     except Exception as e:
                             ["static/think/audio1.wav", "What are the two people doing in the audio Choose the correct option from the following options:\n(A) One person is demonstrating how to use the equipment\n(B) The two people are discussing how to use the equipment\n(C) The two people are disassembling the equipment\n(D) One person is teaching another person how to use a piece of equipment\nPlease think and reason about the input audio before you respond."],
                             ["static/think/audio2.wav", "Is the boat in the video moving closer or further away? Choose the correct option from the following options:\n(A) Closer\n(B) Further\nPlease think and reason about the input audio before you respond."],
                         ],
+                        inputs=[audio_input_think, prompt_input_think],
                         label="🧪 Try Examples"
                     )