Duibonduil commited on
Commit
a1e47c1
·
verified ·
1 Parent(s): f8d6e38

Upload 3 files

Browse files
examples/education/mcp.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "mcpServers": {
3
+ "text_to_audio_local_sse": {
4
+ "url": "http://0.0.0.0:8888/sse"
5
+ }
6
+ }
7
+ }
examples/education/mcp_server_text2audio.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from mcp.server import FastMCP
4
+ from pydantic import Field
5
+
6
+ from aworld.utils import import_package
7
+ from aworld.logs.util import logger
8
+
9
+ # Import required packages
10
+ import_package('gtts', install_name='gTTS')
11
+ import_package('pyttsx3', install_name='pyttsx3')
12
+ import_package('librosa', install_name='librosa')
13
+ import_package('soundfile', install_name='soundfile')
14
+ import pyttsx3
15
+ from gtts import gTTS
16
+ import librosa
17
+ import soundfile as sf
18
+
19
+ mcp = FastMCP("text_to_audio")
20
+
21
+ @mcp.tool()
22
+ def convert_text_to_audio(
23
+ text: str = Field(description="Text to convert to audio"),
24
+ output_file: str = Field(description="Path to the generated audio file")
25
+ ) -> str:
26
+ """Convert input text to audio with child-friendly settings.
27
+
28
+ Args:
29
+ text: Input text to convert
30
+ output_file: Path to the generated audio file
31
+ Returns:
32
+ str: Path to the generated audio file
33
+ """
34
+ engine = pyttsx3.init()
35
+ # Set default properties for child-friendly speech
36
+ engine.setProperty('rate', 150) # Slower speaking rate
37
+ engine.setProperty('volume', 0.9)
38
+ try:
39
+ # Use default params if none provided
40
+ params = {
41
+ "speed": 0.9,
42
+ "pitch": 1.1,
43
+ "language": "en-US",
44
+ "output_file": output_file,
45
+ "use_gtts": True
46
+ }
47
+
48
+ # Preprocess text for child-friendly output
49
+ text = _preprocess_text(text)
50
+ if params.get("use_gtts", False):
51
+ # Use gTTS for more natural sound
52
+ tts = gTTS(text=text, lang=params["language"], slow=True)
53
+ tts.save(params["output_file"])
54
+
55
+ # Post-process audio if needed (adjust volume, remove noise, etc.)
56
+ _post_process_audio(params["output_file"])
57
+ return params["output_file"]
58
+
59
+ except Exception as e:
60
+ logger.error("Error in text-to-audio conversion: %s", str(e))
61
+ raise
62
+
63
+ def _preprocess_text(text: str) -> str:
64
+ """Preprocess text for child-friendly output.
65
+
66
+ - Add pauses between sentences
67
+ - Emphasize important words
68
+ - Handle special characters
69
+ """
70
+ # Add slight pauses between sentences
71
+ text = text.replace('. ', '... ')
72
+ # Add emphasis on important words (can be customized)
73
+ text = text.replace('!', '! ... ')
74
+ return text
75
+
76
+ def _post_process_audio(audio_file: str) -> None:
77
+ """Optimized post-processing for audio files."""
78
+ try:
79
+ # Load with a lower sample rate and mono channel
80
+ y, sr = librosa.load(audio_file, sr=16000, mono=True)
81
+ # Use faster normalization method
82
+ y_norm = y / np.max(np.abs(y))
83
+ # Write with optimized settings
84
+ sf.write(
85
+ audio_file,
86
+ y_norm,
87
+ sr,
88
+ format='mp4',
89
+ subtype='MP4'
90
+ )
91
+ except (IOError, ValueError, RuntimeError) as e:
92
+ logger.warning("Audio post-processing failed: %s", e)
93
+
94
+ # Main function
95
+ if __name__ == "__main__":
96
+ mcp.settings.port = 8888
97
+ mcp.run(transport='sse')
98
+
99
+ # text = "Hello, this is a test of the text-to-audio conversion."
100
+ # output_file = "output1.mp4"
101
+ # print(f"Converting text to audio: {text}")
102
+ # audio_file = convert_text_to_audio(text, output_file)
103
+ # print(f"Audio file saved to: {audio_file}")
examples/education/run.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+
4
+ from aworld.config.conf import AgentConfig
5
+ from aworld.core.agent.base import Agent
6
+ from aworld.runner import Runners
7
+
8
+ if __name__ == '__main__':
9
+ agent_config = AgentConfig(
10
+ llm_provider="openai",
11
+ llm_model_name="gpt-4o",
12
+ llm_api_key="YOUR_API_KEY",
13
+ llm_base_url="http://localhost:5080"
14
+ )
15
+
16
+ edu_sys_prompt = "You are a helpful agent to convert text to audio for children education."
17
+ edu = Agent(
18
+ conf=agent_config,
19
+ name="edu_agent",
20
+ system_prompt=edu_sys_prompt,
21
+ mcp_servers=["text_to_audio_local_sse"] # MCP server name for agent to use
22
+ )
23
+
24
+ # run
25
+ Runners.sync_run(
26
+ input="use text_to_audio_local_sse to convert text to audio: Hello, world!",
27
+ agent=edu
28
+ )