Spaces:

Dhahlan2000
/

dechat_space_zero

Sleeping

App Files Files Community

Dhahlan2000 commited on Dec 16, 2024

Commit

4e06e40

verified ·

1 Parent(s): 9604a21

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -1

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import os
 # Replace 'your_huggingface_token' with your actual Hugging Face access token
 access_token = os.getenv('token')
@@ -19,6 +20,29 @@ model.eval()  # Set the model to evaluation mode
 # Initialize the inference client (if needed for other API-based tasks)
 client = InferenceClient(token=access_token)
 def conversation_predict(input_text):
     """Generate a response for single-turn input using the model."""
     # Tokenize the input text
@@ -28,7 +52,12 @@ def conversation_predict(input_text):
     outputs = model.generate(input_ids, max_new_tokens=2048)
     # Decode and return the generated response
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def respond(
     message: str,

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import os
+import pyttsx3  # Importing pyttsx3 for text-to-speech
 # Replace 'your_huggingface_token' with your actual Hugging Face access token
 access_token = os.getenv('token')
 # Initialize the inference client (if needed for other API-based tasks)
 client = InferenceClient(token=access_token)
+# Initialize the text-to-speech engine
+tts_engine = pyttsx3.init()
+# Import required modules for E2-F5-TTS
+from huggingface_hub import Client
+# Initialize the E2-F5-TTS client
+client_tts = Client("mrfakename/E2-F5-TTS")
+def text_to_speech(text, sample):
+    result = client_tts.predict(
+        ref_audio_input=handle_file(f'input/{sample}.mp3'),
+        ref_text_input="",
+        gen_text_input=text,
+        remove_silence=False,
+        cross_fade_duration_slider=0.15,
+        speed_slider=1,
+        api_name="/basic_tts"
+    )
+    audio_file = open(result[0], "rb")
+    audio_bytes = audio_file.read()
+    return audio_bytes
 def conversation_predict(input_text):
     """Generate a response for single-turn input using the model."""
     # Tokenize the input text
     outputs = model.generate(input_ids, max_new_tokens=2048)
     # Decode and return the generated response
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Convert the text response to speech using E2-F5-TTS
+    audio_bytes = text_to_speech(response, sample="input")
+    return response, audio_bytes
 def respond(
     message: str,