awacke1 commited on
Commit
1749dd1
Β·
verified Β·
1 Parent(s): 9e93b97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +669 -0
app.py ADDED
@@ -0,0 +1,669 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import websockets
4
+ import uuid
5
+ import argparse
6
+ from datetime import datetime
7
+ import os
8
+ import random
9
+ import time
10
+ import hashlib
11
+ from PIL import Image
12
+ import glob
13
+ import base64
14
+ import io
15
+ import streamlit.components.v1 as components
16
+ import edge_tts
17
+ from audio_recorder_streamlit import audio_recorder
18
+ import nest_asyncio
19
+ import re
20
+ from streamlit_paste_button import paste_image_button
21
+ import pytz
22
+ import shutil
23
+ import anthropic
24
+ import openai
25
+ from PyPDF2 import PdfReader
26
+ import threading
27
+ import json
28
+ import zipfile
29
+ from gradio_client import Client
30
+ from dotenv import load_dotenv
31
+ from streamlit_marquee import streamlit_marquee
32
+
33
+ # Patch for nested async
34
+ nest_asyncio.apply()
35
+
36
+ # Static config
37
+ icons = 'πŸ€–πŸ§ πŸ”¬πŸ“'
38
+ START_ROOM = "Sector 🌌"
39
+
40
+ # Page setup
41
+ st.set_page_config(
42
+ page_title="πŸ€–πŸ§ MMO Chat & Research BrainπŸ“πŸ”¬",
43
+ page_icon=icons,
44
+ layout="wide",
45
+ initial_sidebar_state="auto"
46
+ )
47
+
48
+ # Funky usernames with voices
49
+ FUN_USERNAMES = {
50
+ "CosmicJester 🌌": "en-US-AriaNeural",
51
+ "PixelPanda 🐼": "en-US-JennyNeural",
52
+ "QuantumQuack πŸ¦†": "en-GB-SoniaNeural",
53
+ "StellarSquirrel 🐿️": "en-AU-NatashaNeural",
54
+ "GizmoGuru βš™οΈ": "en-CA-ClaraNeural",
55
+ "NebulaNinja 🌠": "en-US-GuyNeural",
56
+ "ByteBuster πŸ’Ύ": "en-GB-RyanNeural",
57
+ "GalacticGopher 🌍": "en-AU-WilliamNeural",
58
+ "RocketRaccoon πŸš€": "en-CA-LiamNeural",
59
+ "EchoElf 🧝": "en-US-AnaNeural",
60
+ "PhantomFox 🦊": "en-US-BrandonNeural",
61
+ "WittyWizard πŸ§™": "en-GB-ThomasNeural",
62
+ "LunarLlama πŸŒ™": "en-AU-FreyaNeural",
63
+ "SolarSloth β˜€οΈ": "en-CA-LindaNeural",
64
+ "AstroAlpaca πŸ¦™": "en-US-ChristopherNeural",
65
+ "CyberCoyote 🐺": "en-GB-ElliotNeural",
66
+ "MysticMoose 🦌": "en-AU-JamesNeural",
67
+ "GlitchGnome 🧚": "en-CA-EthanNeural",
68
+ "VortexViper 🐍": "en-US-AmberNeural",
69
+ "ChronoChimp πŸ’": "en-GB-LibbyNeural"
70
+ }
71
+
72
+ # Directories
73
+ CHAT_DIR = "chat_logs"
74
+ VOTE_DIR = "vote_logs"
75
+ AUDIO_DIR = "audio_logs"
76
+ HISTORY_DIR = "history_logs"
77
+ MEDIA_DIR = "media_files"
78
+ os.makedirs(CHAT_DIR, exist_ok=True)
79
+ os.makedirs(VOTE_DIR, exist_ok=True)
80
+ os.makedirs(AUDIO_DIR, exist_ok=True)
81
+ os.makedirs(HISTORY_DIR, exist_ok=True)
82
+ os.makedirs(MEDIA_DIR, exist_ok=True)
83
+
84
+ CHAT_FILE = os.path.join(CHAT_DIR, "global_chat.md")
85
+ QUOTE_VOTES_FILE = os.path.join(VOTE_DIR, "quote_votes.md")
86
+ MEDIA_VOTES_FILE = os.path.join(VOTE_DIR, "media_votes.md")
87
+ HISTORY_FILE = os.path.join(HISTORY_DIR, "chat_history.md")
88
+
89
+ # Unicode digits
90
+ UNICODE_DIGITS = {i: f"{i}\uFE0F⃣" for i in range(10)}
91
+
92
+ # Unicode fonts
93
+ UNICODE_FONTS = [
94
+ ("Normal", lambda x: x),
95
+ ("Bold", lambda x: "".join(chr(ord(c) + 0x1D400 - 0x41) if 'A' <= c <= 'Z' else chr(ord(c) + 0x1D41A - 0x61) if 'a' <= c <= 'z' else c for c in x)),
96
+ # Add other font styles similarly...
97
+ ]
98
+
99
+ # Global state
100
+ if 'server_running' not in st.session_state:
101
+ st.session_state.server_running = False
102
+ if 'server_task' not in st.session_state:
103
+ st.session_state.server_task = None
104
+ if 'active_connections' not in st.session_state:
105
+ st.session_state.active_connections = {}
106
+ if 'media_notifications' not in st.session_state:
107
+ st.session_state.media_notifications = []
108
+ if 'last_chat_update' not in st.session_state:
109
+ st.session_state.last_chat_update = 0
110
+ if 'displayed_chat_lines' not in st.session_state:
111
+ st.session_state.displayed_chat_lines = []
112
+ if 'message_text' not in st.session_state:
113
+ st.session_state.message_text = ""
114
+ if 'audio_cache' not in st.session_state:
115
+ st.session_state.audio_cache = {}
116
+ if 'pasted_image_data' not in st.session_state:
117
+ st.session_state.pasted_image_data = None
118
+ if 'quote_line' not in st.session_state:
119
+ st.session_state.quote_line = None
120
+ if 'refresh_rate' not in st.session_state:
121
+ st.session_state.refresh_rate = 5
122
+ if 'base64_cache' not in st.session_state:
123
+ st.session_state.base64_cache = {}
124
+ if 'transcript_history' not in st.session_state:
125
+ st.session_state.transcript_history = []
126
+ if 'last_transcript' not in st.session_state:
127
+ st.session_state.last_transcript = ""
128
+ if 'image_hashes' not in st.session_state:
129
+ st.session_state.image_hashes = set()
130
+ if 'tts_voice' not in st.session_state:
131
+ st.session_state.tts_voice = "en-US-AriaNeural"
132
+ if 'chat_history' not in st.session_state:
133
+ st.session_state.chat_history = []
134
+
135
+ # API Keys
136
+ load_dotenv()
137
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY', "")
138
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
139
+ if 'ANTHROPIC_API_KEY' in st.secrets:
140
+ anthropic_key = st.secrets['ANTHROPIC_API_KEY']
141
+ if 'OPENAI_API_KEY' in st.secrets:
142
+ openai_api_key = st.secrets['OPENAI_API_KEY']
143
+ openai_client = openai.OpenAI(api_key=openai_api_key)
144
+
145
+ # Timestamp formatting
146
+ def format_timestamp_prefix(username):
147
+ central = pytz.timezone('US/Central')
148
+ now = datetime.now(central)
149
+ return f"{now.strftime('%I-%M-%p-ct-%m-%d-%Y')}-by-{username}"
150
+
151
+ # Image hash computation
152
+ def compute_image_hash(image_data):
153
+ if isinstance(image_data, Image.Image):
154
+ img_byte_arr = io.BytesIO()
155
+ image_data.save(img_byte_arr, format='PNG')
156
+ img_bytes = img_byte_arr.getvalue()
157
+ else:
158
+ img_bytes = image_data
159
+ return hashlib.md5(img_bytes).hexdigest()[:8]
160
+
161
+ # Node naming
162
+ def get_node_name():
163
+ parser = argparse.ArgumentParser(description='Start a chat node')
164
+ parser.add_argument('--node-name', type=str, default=None)
165
+ parser.add_argument('--port', type=int, default=8501)
166
+ args = parser.parse_args()
167
+ return args.node_name or f"node-{uuid.uuid4().hex[:8]}", args.port
168
+
169
+ # Action logger
170
+ def log_action(username, action):
171
+ if 'action_log' not in st.session_state:
172
+ st.session_state.action_log = {}
173
+ user_log = st.session_state.action_log.setdefault(username, {})
174
+ current_time = time.time()
175
+ user_log = {k: v for k, v in user_log.items() if current_time - v < 10}
176
+ st.session_state.action_log[username] = user_log
177
+ if action not in user_log:
178
+ central = pytz.timezone('US/Central')
179
+ with open(HISTORY_FILE, 'a') as f:
180
+ f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] {username}: {action}\n")
181
+ user_log[action] = current_time
182
+
183
+ # Text cleaning for TTS
184
+ def clean_text_for_tts(text):
185
+ cleaned = re.sub(r'[#*!\[\]]+', '', text)
186
+ cleaned = ' '.join(cleaned.split())
187
+ return cleaned[:200] if cleaned else "No text to speak"
188
+
189
+ # Chat saver
190
+ async def save_chat_entry(username, message, is_markdown=False):
191
+ await asyncio.to_thread(log_action, username, "πŸ’¬πŸ”’ - Chat saver")
192
+ central = pytz.timezone('US/Central')
193
+ timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
194
+ if is_markdown:
195
+ entry = f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
196
+ else:
197
+ entry = f"[{timestamp}] {username}: {message}"
198
+ await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"{entry}\n"))
199
+ voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
200
+ cleaned_message = clean_text_for_tts(message)
201
+ audio_file = await async_edge_tts_generate(cleaned_message, voice)
202
+ if audio_file:
203
+ with open(HISTORY_FILE, 'a') as f:
204
+ f.write(f"[{timestamp}] {username}: Audio generated - {audio_file}\n")
205
+ await broadcast_message(f"{username}|{message}", "chat")
206
+ st.session_state.last_chat_update = time.time()
207
+ return audio_file
208
+
209
+ # Chat loader
210
+ async def load_chat():
211
+ username = st.session_state.get('username', 'System 🌟')
212
+ await asyncio.to_thread(log_action, username, "πŸ“œπŸš€ - Chat loader")
213
+ if not os.path.exists(CHAT_FILE):
214
+ await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! 🎀\n"))
215
+ with open(CHAT_FILE, 'r') as f:
216
+ content = await asyncio.to_thread(f.read)
217
+ return content
218
+
219
+ # Audio generator
220
+ async def async_edge_tts_generate(text, voice, rate=0, pitch=0, file_format="mp3"):
221
+ await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "🎢🌟 - Audio maker")
222
+ timestamp = format_timestamp_prefix(st.session_state.get('username', 'System 🌟'))
223
+ filename = f"{timestamp}.{file_format}"
224
+ filepath = os.path.join(AUDIO_DIR, filename)
225
+ communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
226
+ try:
227
+ await communicate.save(filepath)
228
+ return filepath if os.path.exists(filepath) else None
229
+ except edge_tts.exceptions.NoAudioReceived:
230
+ with open(HISTORY_FILE, 'a') as f:
231
+ central = pytz.timezone('US/Central')
232
+ f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] Audio failed for '{text}'\n")
233
+ return None
234
+
235
+ # Audio player
236
+ def play_and_download_audio(file_path):
237
+ if file_path and os.path.exists(file_path):
238
+ st.audio(file_path)
239
+ if file_path not in st.session_state.base64_cache:
240
+ with open(file_path, "rb") as f:
241
+ b64 = base64.b64encode(f.read()).decode()
242
+ st.session_state.base64_cache[file_path] = b64
243
+ b64 = st.session_state.base64_cache[file_path]
244
+ dl_link = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file_path)}">🎡 Download {os.path.basename(file_path)}</a>'
245
+ st.markdown(dl_link, unsafe_allow_html=True)
246
+
247
+ # Websocket handler
248
+ async def websocket_handler(websocket, path):
249
+ username = st.session_state.get('username', 'System 🌟')
250
+ await asyncio.to_thread(log_action, username, "πŸŒπŸ”— - Websocket handler")
251
+ try:
252
+ client_id = str(uuid.uuid4())
253
+ room_id = "chat"
254
+ st.session_state.active_connections.setdefault(room_id, {})[client_id] = websocket
255
+ chat_content = await load_chat()
256
+ username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys())))
257
+ if not any(f"Client-{client_id}" in line for line in chat_content.split('\n')):
258
+ await save_chat_entry(f"Client-{client_id}", f"{username} has joined {START_ROOM}!")
259
+ async for message in websocket:
260
+ parts = message.split('|', 1)
261
+ if len(parts) == 2:
262
+ username, content = parts
263
+ await save_chat_entry(username, content)
264
+ except websockets.ConnectionClosed:
265
+ pass
266
+ finally:
267
+ if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]:
268
+ del st.session_state.active_connections[room_id][client_id]
269
+
270
+ # Message broadcaster
271
+ async def broadcast_message(message, room_id):
272
+ await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "πŸ“’βœˆοΈ - Message broadcaster")
273
+ if room_id in st.session_state.active_connections:
274
+ disconnected = []
275
+ for client_id, ws in st.session_state.active_connections[room_id].items():
276
+ try:
277
+ await ws.send(message)
278
+ except websockets.ConnectionClosed:
279
+ disconnected.append(client_id)
280
+ for client_id in disconnected:
281
+ del st.session_state.active_connections[room_id][client_id]
282
+
283
+ # Server starter
284
+ async def run_websocket_server():
285
+ await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "πŸ–₯οΈπŸŒ€ - Server starter")
286
+ if not st.session_state.server_running:
287
+ server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
288
+ st.session_state.server_running = True
289
+ await server.wait_closed()
290
+
291
+ # PDF to Audio Processor
292
+ class AudioProcessor:
293
+ def __init__(self):
294
+ self.cache_dir = "audio_cache"
295
+ os.makedirs(self.cache_dir, exist_ok=True)
296
+ self.metadata = self._load_metadata()
297
+
298
+ def _load_metadata(self):
299
+ metadata_file = os.path.join(self.cache_dir, "metadata.json")
300
+ return json.load(open(metadata_file)) if os.path.exists(metadata_file) else {}
301
+
302
+ def _save_metadata(self):
303
+ metadata_file = os.path.join(self.cache_dir, "metadata.json")
304
+ with open(metadata_file, 'w') as f:
305
+ json.dump(self.metadata, f)
306
+
307
+ async def create_audio(self, text, voice='en-US-AriaNeural'):
308
+ cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
309
+ cache_path = os.path.join(self.cache_dir, f"{cache_key}.mp3")
310
+ if cache_key in self.metadata and os.path.exists(cache_path):
311
+ return open(cache_path, 'rb').read()
312
+ text = text.replace("\n", " ").replace("</s>", " ").strip()
313
+ if not text:
314
+ return None
315
+ communicate = edge_tts.Communicate(text, voice)
316
+ await communicate.save(cache_path)
317
+ self.metadata[cache_key] = {
318
+ 'timestamp': datetime.now().isoformat(),
319
+ 'text_length': len(text),
320
+ 'voice': voice
321
+ }
322
+ self._save_metadata()
323
+ return open(cache_path, 'rb').read()
324
+
325
+ def get_download_link(bin_data, filename, size_mb=None):
326
+ b64 = base64.b64encode(bin_data).decode()
327
+ size_str = f"({size_mb:.1f} MB)" if size_mb else ""
328
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">πŸ“₯ {filename} {size_str}</a>'
329
+
330
+ def process_pdf(pdf_file, max_pages, voice, audio_processor):
331
+ reader = PdfReader(pdf_file)
332
+ total_pages = min(len(reader.pages), max_pages)
333
+ texts, audios = [], {}
334
+ async def process_page(i, text):
335
+ audio_data = await audio_processor.create_audio(text, voice)
336
+ audios[i] = audio_data
337
+ for i in range(total_pages):
338
+ text = reader.pages[i].extract_text()
339
+ texts.append(text)
340
+ threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start()
341
+ return texts, audios, total_pages
342
+
343
+ # AI Lookup
344
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
345
+ client = anthropic.Anthropic(api_key=anthropic_key)
346
+ response = client.messages.create(
347
+ model="claude-3-sonnet-20240229",
348
+ max_tokens=1000,
349
+ messages=[{"role": "user", "content": q}]
350
+ )
351
+ result = response.content[0].text
352
+ st.markdown("### Claude's reply 🧠:")
353
+ st.markdown(result)
354
+ md_file = create_file(q, result)
355
+ audio_file = speak_with_edge_tts(result, st.session_state.tts_voice)
356
+ play_and_download_audio(audio_file)
357
+ if useArxiv:
358
+ q += result
359
+ gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
360
+ refs = gradio_client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
361
+ result = f"πŸ”Ž {q}\n\n{refs}"
362
+ md_file, audio_file = save_qa_with_audio(q, result)
363
+ play_and_download_audio(audio_file)
364
+ papers = parse_arxiv_refs(refs)
365
+ if papers and useArxivAudio:
366
+ asyncio.run(create_paper_audio_files(papers, q))
367
+ return result, papers
368
+ return result, []
369
+
370
+ def create_file(prompt, response, file_type="md"):
371
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
372
+ filename = f"{timestamp}_{clean_text_for_filename(prompt[:40] + ' ' + response[:40])}.{file_type}"
373
+ with open(filename, 'w', encoding='utf-8') as f:
374
+ f.write(prompt + "\n\n" + response)
375
+ return filename
376
+
377
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
378
+ result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
379
+ return result
380
+
381
+ def save_qa_with_audio(question, answer, voice=None):
382
+ voice = voice or st.session_state.tts_voice
383
+ md_file = create_file(question, answer, "md")
384
+ audio_file = speak_with_edge_tts(f"{question}\n\nAnswer: {answer}", voice)
385
+ return md_file, audio_file
386
+
387
+ def clean_text_for_filename(text):
388
+ text = text.lower()
389
+ text = re.sub(r'[^\w\s-]', '', text)
390
+ return '_'.join(text.split())[:200]
391
+
392
+ def parse_arxiv_refs(ref_text):
393
+ # Simplified parsing for brevity
394
+ return [{"title": line.strip(), "url": "", "authors": "", "summary": "", "full_audio": None, "download_base64": ""} for line in ref_text.split('\n') if line.strip()]
395
+
396
+ async def create_paper_audio_files(papers, input_question):
397
+ for paper in papers:
398
+ audio_text = f"{paper['title']}"
399
+ audio_file = await async_edge_tts_generate(audio_text, st.session_state.tts_voice)
400
+ paper['full_audio'] = audio_file
401
+ if audio_file:
402
+ with open(audio_file, "rb") as f:
403
+ b64 = base64.b64encode(f.read()).decode()
404
+ paper['download_base64'] = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(audio_file)}">🎡 Download</a>'
405
+
406
+ # ASR Component HTML (Fixed Audio Chat)
407
+ ASR_HTML = """
408
+ <html>
409
+ <head>
410
+ <title>Continuous Speech Demo</title>
411
+ <style>
412
+ body { font-family: sans-serif; padding: 20px; max-width: 800px; margin: 0 auto; }
413
+ button { padding: 10px 20px; margin: 10px 5px; font-size: 16px; }
414
+ #status { margin: 10px 0; padding: 10px; background: #e8f5e9; border-radius: 4px; }
415
+ #output { white-space: pre-wrap; padding: 15px; background: #f5f5f5; border-radius: 4px; margin: 10px 0; min-height: 100px; max-height: 400px; overflow-y: auto; }
416
+ </style>
417
+ </head>
418
+ <body>
419
+ <div>
420
+ <button id="start">Start Listening</button>
421
+ <button id="stop" disabled>Stop Listening</button>
422
+ <button id="clear">Clear Text</button>
423
+ </div>
424
+ <div id="status">Ready</div>
425
+ <div id="output"></div>
426
+ <script>
427
+ if (!('webkitSpeechRecognition' in window)) {
428
+ alert('Speech recognition not supported');
429
+ } else {
430
+ const recognition = new webkitSpeechRecognition();
431
+ const startButton = document.getElementById('start');
432
+ const stopButton = document.getElementById('stop');
433
+ const clearButton = document.getElementById('clear');
434
+ const status = document.getElementById('status');
435
+ const output = document.getElementById('output');
436
+ let fullTranscript = '';
437
+ let lastUpdateTime = Date.now();
438
+
439
+ recognition.continuous = true;
440
+ recognition.interimResults = true;
441
+
442
+ const startRecognition = () => {
443
+ try {
444
+ recognition.start();
445
+ status.textContent = 'Listening...';
446
+ startButton.disabled = true;
447
+ stopButton.disabled = false;
448
+ } catch (e) {
449
+ console.error(e);
450
+ status.textContent = 'Error: ' + e.message;
451
+ }
452
+ };
453
+
454
+ window.addEventListener('load', () => setTimeout(startRecognition, 1000));
455
+
456
+ startButton.onclick = startRecognition;
457
+
458
+ stopButton.onclick = () => {
459
+ recognition.stop();
460
+ status.textContent = 'Stopped';
461
+ startButton.disabled = false;
462
+ stopButton.disabled = true;
463
+ };
464
+
465
+ clearButton.onclick = () => {
466
+ fullTranscript = '';
467
+ output.textContent = '';
468
+ sendDataToPython({value: '', dataType: "json"});
469
+ };
470
+
471
+ recognition.onresult = (event) => {
472
+ let interimTranscript = '';
473
+ let finalTranscript = '';
474
+
475
+ for (let i = event.resultIndex; i < event.results.length; i++) {
476
+ const transcript = event.results[i][0].transcript;
477
+ if (event.results[i].isFinal) {
478
+ finalTranscript += transcript + '\\n';
479
+ } else {
480
+ interimTranscript += transcript;
481
+ }
482
+ }
483
+
484
+ if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
485
+ if (finalTranscript) fullTranscript += finalTranscript;
486
+ lastUpdateTime = Date.now();
487
+ output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
488
+ output.scrollTop = output.scrollHeight;
489
+ sendDataToPython({value: fullTranscript, dataType: "json"});
490
+ }
491
+ };
492
+
493
+ recognition.onend = () => {
494
+ if (!stopButton.disabled) {
495
+ try {
496
+ recognition.start();
497
+ console.log('Restarted recognition');
498
+ } catch (e) {
499
+ console.error('Failed to restart:', e);
500
+ status.textContent = 'Error restarting: ' + e.message;
501
+ startButton.disabled = false;
502
+ stopButton.disabled = true;
503
+ }
504
+ }
505
+ };
506
+
507
+ recognition.onerror = (event) => {
508
+ console.error('Recognition error:', event.error);
509
+ status.textContent = 'Error: ' + event.error;
510
+ if (event.error === 'not-allowed' || event.error === 'service-not-allowed') {
511
+ startButton.disabled = false;
512
+ stopButton.disabled = true;
513
+ }
514
+ };
515
+ }
516
+
517
+ function sendDataToPython(data) {
518
+ window.parent.postMessage({
519
+ isStreamlitMessage: true,
520
+ type: "streamlit:setComponentValue",
521
+ ...data
522
+ }, "*");
523
+ }
524
+
525
+ window.addEventListener('load', () => {
526
+ window.setTimeout(() => {
527
+ window.parent.postMessage({
528
+ isStreamlitMessage: true,
529
+ type: "streamlit:setFrameHeight",
530
+ height: document.documentElement.clientHeight
531
+ }, "*");
532
+ }, 0);
533
+ });
534
+ </script>
535
+ </body>
536
+ </html>
537
+ """
538
+
539
+ # Main execution
540
+ def main():
541
+ NODE_NAME, port = get_node_name()
542
+ loop = asyncio.new_event_loop()
543
+ asyncio.set_event_loop(loop)
544
+
545
+ async def async_interface():
546
+ if 'username' not in st.session_state:
547
+ chat_content = await load_chat()
548
+ available_names = [name for name in FUN_USERNAMES if not any(f"{name} has joined" in line for line in chat_content.split('\n'))]
549
+ st.session_state.username = random.choice(available_names) if available_names else random.choice(list(FUN_USERNAMES.keys()))
550
+ st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username]
551
+ st.markdown(f"**πŸŽ™οΈ Voice**: {st.session_state.tts_voice} πŸ—£οΈ for {st.session_state.username}")
552
+
553
+ st.title(f"πŸ€–πŸ§ MMO Chat & Research for {st.session_state.username}πŸ“πŸ”¬")
554
+ st.markdown(f"Welcome to {START_ROOM} - chat, research, upload, and more! πŸŽ‰")
555
+
556
+ if not st.session_state.server_task:
557
+ st.session_state.server_task = loop.create_task(run_websocket_server())
558
+
559
+ # Tabs
560
+ tab_main = st.radio("Action:", ["🎀 Chat & Voice", "πŸ“Έ Media", "πŸ” ArXiv", "πŸ“š PDF to Audio"], horizontal=True)
561
+ useArxiv = st.checkbox("Search Arxiv", value=True)
562
+ useArxivAudio = st.checkbox("Generate Arxiv Audio", value=False)
563
+
564
+ # Chat & Voice Tab
565
+ if tab_main == "🎀 Chat & Voice":
566
+ st.subheader(f"{START_ROOM} Chat πŸ’¬")
567
+ chat_content = await load_chat()
568
+ chat_lines = chat_content.split('\n')
569
+ for i, line in enumerate(chat_lines):
570
+ if line.strip() and ': ' in line:
571
+ st.markdown(line)
572
+ if st.button("πŸ“’ Speak", key=f"speak_{i}"):
573
+ audio_file = await async_edge_tts_generate(clean_text_for_tts(line.split(': ', 1)[1]), st.session_state.tts_voice)
574
+ play_and_download_audio(audio_file)
575
+
576
+ message = st.text_input(f"Message as {st.session_state.username}", key="message_input")
577
+ if st.button("Send πŸš€") and message.strip():
578
+ await save_chat_entry(st.session_state.username, message, is_markdown=True)
579
+ st.rerun()
580
+
581
+ st.subheader("🎀 Continuous Speech Input")
582
+ asr_component = components.html(ASR_HTML, height=400)
583
+ if asr_component and isinstance(asr_component, dict) and 'value' in asr_component:
584
+ transcript = asr_component['value'].strip()
585
+ if transcript and transcript != st.session_state.last_transcript:
586
+ await save_chat_entry(st.session_state.username, transcript, is_markdown=True)
587
+ st.session_state.last_transcript = transcript
588
+ st.rerun()
589
+
590
+ # Media Tab with Galleries
591
+ elif tab_main == "πŸ“Έ Media":
592
+ st.header("πŸ“Έ Media Gallery")
593
+ tabs = st.tabs(["🎡 Audio", "πŸ–Ό Images", "πŸŽ₯ Video"])
594
+ with tabs[0]:
595
+ st.subheader("🎡 Audio Files")
596
+ audio_files = glob.glob(f"{MEDIA_DIR}/*.mp3")
597
+ for a in audio_files:
598
+ with st.expander(os.path.basename(a)):
599
+ play_and_download_audio(a)
600
+ with tabs[1]:
601
+ st.subheader("πŸ–Ό Images")
602
+ imgs = glob.glob(f"{MEDIA_DIR}/*.png") + glob.glob(f"{MEDIA_DIR}/*.jpg")
603
+ if imgs:
604
+ cols = st.columns(3)
605
+ for i, f in enumerate(imgs):
606
+ with cols[i % 3]:
607
+ st.image(f, use_container_width=True)
608
+ with tabs[2]:
609
+ st.subheader("πŸŽ₯ Videos")
610
+ vids = glob.glob(f"{MEDIA_DIR}/*.mp4")
611
+ for v in vids:
612
+ with st.expander(os.path.basename(v)):
613
+ st.video(v)
614
+
615
+ uploaded_file = st.file_uploader("Upload Media", type=['png', 'jpg', 'mp4', 'mp3'])
616
+ if uploaded_file:
617
+ timestamp = format_timestamp_prefix(st.session_state.username)
618
+ ext = uploaded_file.name.split('.')[-1]
619
+ file_hash = hashlib.md5(uploaded_file.getbuffer()).hexdigest()[:8]
620
+ filename = f"{timestamp}-{file_hash}.{ext}"
621
+ file_path = os.path.join(MEDIA_DIR, filename)
622
+ with open(file_path, 'wb') as f:
623
+ f.write(uploaded_file.getbuffer())
624
+ await save_chat_entry(st.session_state.username, f"Uploaded media: {file_path}")
625
+ st.rerun()
626
+
627
+ # ArXiv Tab
628
+ elif tab_main == "πŸ” ArXiv":
629
+ st.subheader("πŸ” Query ArXiv")
630
+ q = st.text_input("πŸ” Query:")
631
+ if q and st.button("πŸ” Run"):
632
+ result, papers = perform_ai_lookup(q, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
633
+ for paper in papers:
634
+ with st.expander(paper['title']):
635
+ st.markdown(f"**Summary**: {paper['summary']}")
636
+ if paper['full_audio']:
637
+ play_and_download_audio(paper['full_audio'])
638
+
639
+ # PDF to Audio Tab
640
+ elif tab_main == "πŸ“š PDF to Audio":
641
+ st.subheader("πŸ“š PDF to Audio Converter")
642
+ audio_processor = AudioProcessor()
643
+ uploaded_file = st.file_uploader("Choose a PDF file", "pdf")
644
+ max_pages = st.slider('Pages to process', 1, 100, 10)
645
+ if uploaded_file:
646
+ with st.spinner('Processing PDF...'):
647
+ texts, audios, total_pages = process_pdf(uploaded_file, max_pages, st.session_state.tts_voice, audio_processor)
648
+ for i, text in enumerate(texts):
649
+ with st.expander(f"Page {i+1}"):
650
+ st.markdown(text)
651
+ while i not in audios:
652
+ time.sleep(0.1)
653
+ if audios[i]:
654
+ st.audio(audios[i], format='audio/mp3')
655
+ st.markdown(get_download_link(audios[i], f'page_{i+1}.mp3', len(audios[i]) / (1024 * 1024)), unsafe_allow_html=True)
656
+
657
+ # Sidebar
658
+ st.sidebar.subheader("Voice Settings")
659
+ new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
660
+ if new_username != st.session_state.username:
661
+ await save_chat_entry("System 🌟", f"{st.session_state.username} changed to {new_username}")
662
+ st.session_state.username = new_username
663
+ st.session_state.tts_voice = FUN_USERNAMES[new_username]
664
+ st.rerun()
665
+
666
+ loop.run_until_complete(async_interface())
667
+
668
+ if __name__ == "__main__":
669
+ main()