whisper_fileStream

Runtime error

mskov commited on Mar 9, 2023

Commit

17ef5ad

1 Parent(s): 1b3e680

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -43,10 +43,12 @@ def transcribe(audio_file):
     transcription = asr_model.transcribe(audio_file)["text"]
     return transcription
-def inference(audio, prompt, model, temperature):
     # Transcribe with Whisper
     print("The audio is:", audio)
     transcript = transcribe(audio)
     text = prompt + transcript + "\nPrediction: "
@@ -70,12 +72,14 @@ def inference(audio, prompt, model, temperature):
         #print("Infered type is: ", type(infered))
         infers = list(map(lambda x: x.replace("\n", ""), temp))
         #infered = list(map(lambda x: x.split(','), infers))
-    return transcript, infers
 # get audio from microphone
 with gr.Blocks() as face:
     with gr.Row():
         with gr.Column():
             audio = gr.Audio(source="microphone", type="filepath")
@@ -86,8 +90,9 @@ with gr.Blocks() as face:
         with gr.Column():
             script = gr.Textbox(label="text...")
             options = gr.Textbox(label="predictions...")
             #transcribe_btn.click(inference)
-    transcribe_btn.click(fn=inference, inputs=[audio, promptText, dropChoice, sliderChoice], outputs=[script, options])
     examples = gr.Examples(examples=["Sedan, Truck, SUV", "Dalmaion, Shepherd, Lab, Mutt"], inputs=[options])
 face.launch()

     transcription = asr_model.transcribe(audio_file)["text"]
     return transcription
+def inference(audio, prompt, model, temperature, latest):
     # Transcribe with Whisper
     print("The audio is:", audio)
     transcript = transcribe(audio)
+    lastest.append(transcript)
     text = prompt + transcript + "\nPrediction: "
         #print("Infered type is: ", type(infered))
         infers = list(map(lambda x: x.replace("\n", ""), temp))
         #infered = list(map(lambda x: x.split(','), infers))
+        convoState: latest
+    return transcript, infers, convoState
 # get audio from microphone
 with gr.Blocks() as face:
+    convoState = gr.State([])
     with gr.Row():
         with gr.Column():
             audio = gr.Audio(source="microphone", type="filepath")
         with gr.Column():
             script = gr.Textbox(label="text...")
             options = gr.Textbox(label="predictions...")
+            latestConvo = gr.Textbox()
             #transcribe_btn.click(inference)
+    transcribe_btn.click(fn=inference, inputs=[audio, promptText, dropChoice, sliderChoice, convoState], outputs=[script, options, latestConvo])
     examples = gr.Examples(examples=["Sedan, Truck, SUV", "Dalmaion, Shepherd, Lab, Mutt"], inputs=[options])
 face.launch()