Futuresony commited on
Commit
efc1876
·
verified ·
1 Parent(s): d25d7d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -3,25 +3,30 @@ from huggingface_hub import InferenceClient
3
  import soundfile as sf
4
  from transformers import pipeline
5
  import torch
 
6
 
7
- # Initialize the client for the text generation model.
8
  client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
9
 
10
- # Initialize the TTS pipeline from Huggingface.
11
  synthesizer = pipeline("text-to-speech", model="Futuresony/output")
12
 
 
 
 
 
13
  def respond(
14
  message,
 
15
  system_message,
16
  max_tokens,
17
  temperature,
18
  top_p,
19
- history=[]
20
  ):
21
- # Prepare the messages for the chatbot.
22
  messages = [{"role": "system", "content": system_message}]
23
 
24
- # Add history of previous conversation.
25
  for val in history:
26
  if val[0]:
27
  messages.append({"role": "user", "content": val[0]})
@@ -32,7 +37,7 @@ def respond(
32
 
33
  response = ""
34
 
35
- # Generate the response from the model.
36
  for message in client.chat_completion(
37
  messages,
38
  max_tokens=max_tokens,
@@ -44,16 +49,16 @@ def respond(
44
  response += token
45
  yield response
46
 
47
- # Convert the generated text to speech.
48
- speech = synthesizer(response)
49
 
50
- # Save the generated speech to a file.
51
  sf.write("generated_speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
52
 
53
- # Return both the text and the audio for playback.
54
  return response, "generated_speech.wav"
55
 
56
- # Create the Gradio interface with a textbox for the user to input a message.
57
  demo = gr.Interface(
58
  fn=respond,
59
  inputs=[
 
3
  import soundfile as sf
4
  from transformers import pipeline
5
  import torch
6
+ from datasets import load_dataset
7
 
8
+ # Initialize the client for the text generation model
9
  client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
10
 
11
+ # Initialize the TTS pipeline from Huggingface
12
  synthesizer = pipeline("text-to-speech", model="Futuresony/output")
13
 
14
+ # Load the speaker embeddings dataset
15
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
16
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
17
+
18
  def respond(
19
  message,
20
+ history: list[tuple[str, str]],
21
  system_message,
22
  max_tokens,
23
  temperature,
24
  top_p,
 
25
  ):
26
+ # Prepare the messages for the chatbot
27
  messages = [{"role": "system", "content": system_message}]
28
 
29
+ # Add history of previous conversation
30
  for val in history:
31
  if val[0]:
32
  messages.append({"role": "user", "content": val[0]})
 
37
 
38
  response = ""
39
 
40
+ # Generate the response from the model
41
  for message in client.chat_completion(
42
  messages,
43
  max_tokens=max_tokens,
 
49
  response += token
50
  yield response
51
 
52
+ # Convert the generated text to speech
53
+ speech = synthesizer(response, forward_params={"speaker_embeddings": speaker_embedding})
54
 
55
+ # Save the generated speech to a file
56
  sf.write("generated_speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
57
 
58
+ # Return both the text and the audio for playback
59
  return response, "generated_speech.wav"
60
 
61
+ # Create the Gradio interface with a textbox for the user to input a message
62
  demo = gr.Interface(
63
  fn=respond,
64
  inputs=[