Spaces:

Futuresony
/

Me

Running

App Files Files Community

Futuresony commited on Jan 14

Commit

efc1876

verified ·

1 Parent(s): d25d7d6

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -11

app.py CHANGED Viewed

@@ -3,25 +3,30 @@ from huggingface_hub import InferenceClient
 import soundfile as sf
 from transformers import pipeline
 import torch
-# Initialize the client for the text generation model.
 client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
-# Initialize the TTS pipeline from Huggingface.
 synthesizer = pipeline("text-to-speech", model="Futuresony/output")
 def respond(
     message,
     system_message,
     max_tokens,
     temperature,
     top_p,
-    history=[]
 ):
-    # Prepare the messages for the chatbot.
     messages = [{"role": "system", "content": system_message}]
-    # Add history of previous conversation.
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
@@ -32,7 +37,7 @@ def respond(
     response = ""
-    # Generate the response from the model.
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
@@ -44,16 +49,16 @@ def respond(
         response += token
         yield response
-    # Convert the generated text to speech.
-    speech = synthesizer(response)
-    # Save the generated speech to a file.
     sf.write("generated_speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
-    # Return both the text and the audio for playback.
     return response, "generated_speech.wav"
-# Create the Gradio interface with a textbox for the user to input a message.
 demo = gr.Interface(
     fn=respond,
     inputs=[

 import soundfile as sf
 from transformers import pipeline
 import torch
+from datasets import load_dataset
+# Initialize the client for the text generation model
 client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
+# Initialize the TTS pipeline from Huggingface
 synthesizer = pipeline("text-to-speech", model="Futuresony/output")
+# Load the speaker embeddings dataset
+embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 def respond(
     message,
+    history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
+    # Prepare the messages for the chatbot
     messages = [{"role": "system", "content": system_message}]
+    # Add history of previous conversation
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
     response = ""
+    # Generate the response from the model
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         response += token
         yield response
+    # Convert the generated text to speech
+    speech = synthesizer(response, forward_params={"speaker_embeddings": speaker_embedding})
+    # Save the generated speech to a file
     sf.write("generated_speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
+    # Return both the text and the audio for playback
     return response, "generated_speech.wav"
+# Create the Gradio interface with a textbox for the user to input a message
 demo = gr.Interface(
     fn=respond,
     inputs=[