Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -150,13 +150,13 @@ def create_demo():
|
|
| 150 |
notes = gr.Markdown(
|
| 151 |
"""
|
| 152 |
# Fish Agent
|
| 153 |
-
1.
|
| 154 |
-
2.
|
| 155 |
-
3.
|
| 156 |
-
#
|
| 157 |
-
1.
|
| 158 |
-
2.
|
| 159 |
-
3.
|
| 160 |
"""
|
| 161 |
)
|
| 162 |
|
|
@@ -169,7 +169,7 @@ def create_demo():
|
|
| 169 |
)
|
| 170 |
sys_text_input = gr.Textbox(
|
| 171 |
label="What is your assistant's role?",
|
| 172 |
-
value='
|
| 173 |
type="text",
|
| 174 |
)
|
| 175 |
audio_input = gr.Audio(
|
|
|
|
| 150 |
notes = gr.Markdown(
|
| 151 |
"""
|
| 152 |
# Fish Agent
|
| 153 |
+
1. This demo is the Fish Audio self-developed end-to-end language model Fish Agent 3B version.
|
| 154 |
+
2. You can find the code and weights in our official repository, but all related content is released under the CC BY-NC-SA 4.0 license.
|
| 155 |
+
3. The demo is an early beta version, and inference speed is yet to be optimized.
|
| 156 |
+
# Features
|
| 157 |
+
1. This model automatically integrates ASR and TTS components, requiring no external models, making it truly end-to-end rather than a three-stage process (ASR+LLM+TTS).
|
| 158 |
+
2. The model can use reference audio to control speaking voice.
|
| 159 |
+
3. It can generate audio with strong emotions and prosody.
|
| 160 |
"""
|
| 161 |
)
|
| 162 |
|
|
|
|
| 169 |
)
|
| 170 |
sys_text_input = gr.Textbox(
|
| 171 |
label="What is your assistant's role?",
|
| 172 |
+
value='You are a voice assistant created by Fish Audio, offering end-to-end voice interaction for a seamless user experience. You are required to first transcribe the user's speech, then answer it in the following format: "Question: [USER_SPEECH]\n\nResponse: [YOUR_RESPONSE]\n"。You are required to use the following voice in this conversation.',
|
| 173 |
type="text",
|
| 174 |
)
|
| 175 |
audio_input = gr.Audio(
|