Futuresony commited on
Commit
2071de1
·
verified ·
1 Parent(s): 197cd75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -22
app.py CHANGED
@@ -1,15 +1,15 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import torch
4
  from transformers import pipeline
5
- from datasets import load_dataset
6
-
7
- # Set up your TTS model (as before)
8
- synthesiser = pipeline("text-to-speech", "Futuresony/output")
9
 
10
- # Set up your text generation client
11
  client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
12
 
 
 
 
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
@@ -18,18 +18,21 @@ def respond(
18
  temperature,
19
  top_p,
20
  ):
21
- # Generate text response from your model
22
  messages = [{"role": "system", "content": system_message}]
23
-
 
24
  for val in history:
25
  if val[0]:
26
  messages.append({"role": "user", "content": val[0]})
27
  if val[1]:
28
  messages.append({"role": "assistant", "content": val[1]})
29
-
30
  messages.append({"role": "user", "content": message})
31
 
32
  response = ""
 
 
33
  for message in client.chat_completion(
34
  messages,
35
  max_tokens=max_tokens,
@@ -41,27 +44,26 @@ def respond(
41
  response += token
42
  yield response
43
 
44
- # Convert the generated text into speech (Text-to-Speech)
45
- # Get speaker embedding (optional, if you want to control the speaker)
46
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
47
- speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
48
-
49
- # Generate speech from the text response
50
- speech = synthesiser(response, forward_params={"speaker_embeddings": speaker_embedding})
51
-
52
- # Return the audio as a Gradio audio component
53
- return response, speech["audio"]
54
 
55
 
56
  # Create the Gradio interface
57
- demo = gr.ChatInterface(
58
- respond,
59
- additional_inputs=[
60
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
61
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
62
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
63
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
64
  ],
 
65
  )
66
 
67
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import soundfile as sf
4
  from transformers import pipeline
5
+ import torch
 
 
 
6
 
7
+ # Initialize the client for the text generation model
8
  client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
9
 
10
+ # Initialize the TTS pipeline from Huggingface
11
+ synthesizer = pipeline("text-to-speech", "Futuresony/output")
12
+
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
 
18
  temperature,
19
  top_p,
20
  ):
21
+ # Prepare the messages for the chatbot
22
  messages = [{"role": "system", "content": system_message}]
23
+
24
+ # Add history of previous conversation
25
  for val in history:
26
  if val[0]:
27
  messages.append({"role": "user", "content": val[0]})
28
  if val[1]:
29
  messages.append({"role": "assistant", "content": val[1]})
30
+
31
  messages.append({"role": "user", "content": message})
32
 
33
  response = ""
34
+
35
+ # Generate the response from the model
36
  for message in client.chat_completion(
37
  messages,
38
  max_tokens=max_tokens,
 
44
  response += token
45
  yield response
46
 
47
+ # Convert the generated text to speech
48
+ speech = synthesizer(response)
49
+
50
+ # Save the generated speech to a file
51
+ sf.write("generated_speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
52
+
53
+ # Return both the text and the audio for playback
54
+ return response, "generated_speech.wav"
 
 
55
 
56
 
57
  # Create the Gradio interface
58
+ demo = gr.Interface(
59
+ fn=respond,
60
+ inputs=[
61
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
62
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
63
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
64
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
65
  ],
66
+ outputs=[gr.Textbox(), gr.Audio()],
67
  )
68
 
69
  if __name__ == "__main__":