Duibonduil commited on
Commit
89dd41a
·
verified ·
1 Parent(s): 952a3b0

Upload audio_server.py

Browse files
AWorld-main/aworlddistributed/audio_server.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import os
4
+ import traceback
5
+ from typing import List
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+ from openai import OpenAI
9
+ from pydantic import Field
10
+
11
+ from aworld.logs.util import logger
12
+ from mcp_servers.utils import get_file_from_source
13
+
14
+ # Initialize MCP server
15
+ mcp = FastMCP("audio-server")
16
+
17
+
18
+ client = OpenAI(
19
+ api_key=os.getenv("AUDIO_LLM_API_KEY"), base_url=os.getenv("AUDIO_LLM_BASE_URL")
20
+ )
21
+
22
+ AUDIO_TRANSCRIBE = (
23
+ "Input is a base64 encoded audio. Transcribe the audio content. "
24
+ "Return a json string with the following format: "
25
+ '{"audio_text": "transcribed text from audio"}'
26
+ )
27
+
28
+
29
+ def encode_audio(audio_source: str, with_header: bool = True) -> str:
30
+ """
31
+ Encode audio to base64 format with robust file handling
32
+
33
+ Args:
34
+ audio_source: URL or local file path of the audio
35
+ with_header: Whether to include MIME type header
36
+
37
+ Returns:
38
+ str: Base64 encoded audio string, with MIME type prefix if with_header is True
39
+
40
+ Raises:
41
+ ValueError: When audio source is invalid or audio format is not supported
42
+ IOError: When audio file cannot be read
43
+ """
44
+ if not audio_source:
45
+ raise ValueError("Audio source cannot be empty")
46
+
47
+ try:
48
+ # Get file with validation (only audio files allowed)
49
+ file_path, mime_type, content = get_file_from_source(
50
+ audio_source,
51
+ allowed_mime_prefixes=["audio/"],
52
+ max_size_mb=200.0, # 200MB limit for audio files
53
+ type="audio", # Specify type as audio to handle audio files
54
+ )
55
+
56
+ # Encode to base64
57
+ audio_base64 = base64.b64encode(content).decode()
58
+
59
+ # Format with header if requested
60
+ final_audio = (
61
+ f"data:{mime_type};base64,{audio_base64}" if with_header else audio_base64
62
+ )
63
+
64
+ # Clean up temporary file if it was created for a URL
65
+ if file_path != os.path.abspath(audio_source) and os.path.exists(file_path):
66
+ os.unlink(file_path)
67
+
68
+ return final_audio
69
+
70
+ except Exception:
71
+ logger.error(
72
+ f"Error encoding audio from {audio_source}: {traceback.format_exc()}"
73
+ )
74
+ raise
75
+
76
+
77
+ @mcp.tool(description="Transcribe the given audio in a list of filepaths or urls.")
78
+ async def mcp_transcribe_audio(
79
+ audio_urls: List[str] = Field(
80
+ description="The input audio in given a list of filepaths or urls."
81
+ ),
82
+ ) -> str:
83
+ """
84
+ Transcribe the given audio in a list of filepaths or urls.
85
+
86
+ Args:
87
+ audio_urls: List of audio file paths or URLs
88
+
89
+ Returns:
90
+ str: JSON string containing transcriptions
91
+ """
92
+ transcriptions = []
93
+ for audio_url in audio_urls:
94
+ try:
95
+ # Get file with validation (only audio files allowed)
96
+ file_path, _, _ = get_file_from_source(
97
+ audio_url,
98
+ allowed_mime_prefixes=["audio/"],
99
+ max_size_mb=200.0, # 200MB limit for audio files
100
+ type="audio", # Specify type as audio to handle audio files
101
+ )
102
+
103
+ # Use the file for transcription
104
+ with open(file_path, "rb") as audio_file:
105
+ transcription = client.audio.transcriptions.create(
106
+ file=audio_file,
107
+ model=os.getenv("AUDIO_LLM_MODEL_NAME"),
108
+ response_format="text",
109
+ )
110
+ transcriptions.append(transcription)
111
+
112
+ # Clean up temporary file if it was created for a URL
113
+ if file_path != os.path.abspath(audio_url) and os.path.exists(file_path):
114
+ os.unlink(file_path)
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error transcribing {audio_url}: {traceback.format_exc()}")
118
+ transcriptions.append(f"Error: {str(e)}")
119
+
120
+ logger.info(f"---get_text_by_transcribe-transcription:{transcriptions}")
121
+ return json.dumps(transcriptions, ensure_ascii=False)
122
+
123
+
124
+ def main():
125
+ from dotenv import load_dotenv
126
+
127
+ load_dotenv()
128
+
129
+ print("Starting Audio MCP Server...", file=sys.stderr)
130
+ mcp.run(transport="stdio")
131
+
132
+
133
+ # Make the module callable
134
+ def __call__():
135
+ """
136
+ Make the module callable for uvx.
137
+ This function is called when the module is executed directly.
138
+ """
139
+ main()
140
+
141
+
142
+ # Add this for compatibility with uvx
143
+ import sys
144
+
145
+ sys.modules[__name__].__call__ = __call__
146
+
147
+ # Run the server when the script is executed directly
148
+ if __name__ == "__main__":
149
+ main()