Spaces:

awacke1
/

SectorMultiplayerChatServer

Running

App Files Files Community

awacke1 commited on Mar 2

Commit

1749dd1

verified ·

1 Parent(s): 9e93b97

Create app.py

Browse files

Files changed (1) hide show

app.py +669 -0

app.py ADDED Viewed

	@@ -0,0 +1,669 @@

+import streamlit as st
+import asyncio
+import websockets
+import uuid
+import argparse
+from datetime import datetime
+import os
+import random
+import time
+import hashlib
+from PIL import Image
+import glob
+import base64
+import io
+import streamlit.components.v1 as components
+import edge_tts
+from audio_recorder_streamlit import audio_recorder
+import nest_asyncio
+import re
+from streamlit_paste_button import paste_image_button
+import pytz
+import shutil
+import anthropic
+import openai
+from PyPDF2 import PdfReader
+import threading
+import json
+import zipfile
+from gradio_client import Client
+from dotenv import load_dotenv
+from streamlit_marquee import streamlit_marquee
+# Patch for nested async
+nest_asyncio.apply()
+# Static config
+icons = '🤖🧠🔬📝'
+START_ROOM = "Sector 🌌"
+# Page setup
+st.set_page_config(
+    page_title="🤖🧠MMO Chat & Research Brain📝🔬",
+    page_icon=icons,
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# Funky usernames with voices
+FUN_USERNAMES = {
+    "CosmicJester 🌌": "en-US-AriaNeural",
+    "PixelPanda 🐼": "en-US-JennyNeural",
+    "QuantumQuack 🦆": "en-GB-SoniaNeural",
+    "StellarSquirrel 🐿️": "en-AU-NatashaNeural",
+    "GizmoGuru ⚙️": "en-CA-ClaraNeural",
+    "NebulaNinja 🌠": "en-US-GuyNeural",
+    "ByteBuster 💾": "en-GB-RyanNeural",
+    "GalacticGopher 🌍": "en-AU-WilliamNeural",
+    "RocketRaccoon 🚀": "en-CA-LiamNeural",
+    "EchoElf 🧝": "en-US-AnaNeural",
+    "PhantomFox 🦊": "en-US-BrandonNeural",
+    "WittyWizard 🧙": "en-GB-ThomasNeural",
+    "LunarLlama 🌙": "en-AU-FreyaNeural",
+    "SolarSloth ☀️": "en-CA-LindaNeural",
+    "AstroAlpaca 🦙": "en-US-ChristopherNeural",
+    "CyberCoyote 🐺": "en-GB-ElliotNeural",
+    "MysticMoose 🦌": "en-AU-JamesNeural",
+    "GlitchGnome 🧚": "en-CA-EthanNeural",
+    "VortexViper 🐍": "en-US-AmberNeural",
+    "ChronoChimp 🐒": "en-GB-LibbyNeural"
+}
+# Directories
+CHAT_DIR = "chat_logs"
+VOTE_DIR = "vote_logs"
+AUDIO_DIR = "audio_logs"
+HISTORY_DIR = "history_logs"
+MEDIA_DIR = "media_files"
+os.makedirs(CHAT_DIR, exist_ok=True)
+os.makedirs(VOTE_DIR, exist_ok=True)
+os.makedirs(AUDIO_DIR, exist_ok=True)
+os.makedirs(HISTORY_DIR, exist_ok=True)
+os.makedirs(MEDIA_DIR, exist_ok=True)
+CHAT_FILE = os.path.join(CHAT_DIR, "global_chat.md")
+QUOTE_VOTES_FILE = os.path.join(VOTE_DIR, "quote_votes.md")
+MEDIA_VOTES_FILE = os.path.join(VOTE_DIR, "media_votes.md")
+HISTORY_FILE = os.path.join(HISTORY_DIR, "chat_history.md")
+# Unicode digits
+UNICODE_DIGITS = {i: f"{i}\uFE0F⃣" for i in range(10)}
+# Unicode fonts
+UNICODE_FONTS = [
+    ("Normal", lambda x: x),
+    ("Bold", lambda x: "".join(chr(ord(c) + 0x1D400 - 0x41) if 'A' <= c <= 'Z' else chr(ord(c) + 0x1D41A - 0x61) if 'a' <= c <= 'z' else c for c in x)),
+    # Add other font styles similarly...
+]
+# Global state
+if 'server_running' not in st.session_state:
+    st.session_state.server_running = False
+if 'server_task' not in st.session_state:
+    st.session_state.server_task = None
+if 'active_connections' not in st.session_state:
+    st.session_state.active_connections = {}
+if 'media_notifications' not in st.session_state:
+    st.session_state.media_notifications = []
+if 'last_chat_update' not in st.session_state:
+    st.session_state.last_chat_update = 0
+if 'displayed_chat_lines' not in st.session_state:
+    st.session_state.displayed_chat_lines = []
+if 'message_text' not in st.session_state:
+    st.session_state.message_text = ""
+if 'audio_cache' not in st.session_state:
+    st.session_state.audio_cache = {}
+if 'pasted_image_data' not in st.session_state:
+    st.session_state.pasted_image_data = None
+if 'quote_line' not in st.session_state:
+    st.session_state.quote_line = None
+if 'refresh_rate' not in st.session_state:
+    st.session_state.refresh_rate = 5
+if 'base64_cache' not in st.session_state:
+    st.session_state.base64_cache = {}
+if 'transcript_history' not in st.session_state:
+    st.session_state.transcript_history = []
+if 'last_transcript' not in st.session_state:
+    st.session_state.last_transcript = ""
+if 'image_hashes' not in st.session_state:
+    st.session_state.image_hashes = set()
+if 'tts_voice' not in st.session_state:
+    st.session_state.tts_voice = "en-US-AriaNeural"
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+# API Keys
+load_dotenv()
+anthropic_key = os.getenv('ANTHROPIC_API_KEY', "")
+openai_api_key = os.getenv('OPENAI_API_KEY', "")
+if 'ANTHROPIC_API_KEY' in st.secrets:
+    anthropic_key = st.secrets['ANTHROPIC_API_KEY']
+if 'OPENAI_API_KEY' in st.secrets:
+    openai_api_key = st.secrets['OPENAI_API_KEY']
+openai_client = openai.OpenAI(api_key=openai_api_key)
+# Timestamp formatting
+def format_timestamp_prefix(username):
+    central = pytz.timezone('US/Central')
+    now = datetime.now(central)
+    return f"{now.strftime('%I-%M-%p-ct-%m-%d-%Y')}-by-{username}"
+# Image hash computation
+def compute_image_hash(image_data):
+    if isinstance(image_data, Image.Image):
+        img_byte_arr = io.BytesIO()
+        image_data.save(img_byte_arr, format='PNG')
+        img_bytes = img_byte_arr.getvalue()
+    else:
+        img_bytes = image_data
+    return hashlib.md5(img_bytes).hexdigest()[:8]
+# Node naming
+def get_node_name():
+    parser = argparse.ArgumentParser(description='Start a chat node')
+    parser.add_argument('--node-name', type=str, default=None)
+    parser.add_argument('--port', type=int, default=8501)
+    args = parser.parse_args()
+    return args.node_name or f"node-{uuid.uuid4().hex[:8]}", args.port
+# Action logger
+def log_action(username, action):
+    if 'action_log' not in st.session_state:
+        st.session_state.action_log = {}
+    user_log = st.session_state.action_log.setdefault(username, {})
+    current_time = time.time()
+    user_log = {k: v for k, v in user_log.items() if current_time - v < 10}
+    st.session_state.action_log[username] = user_log
+    if action not in user_log:
+        central = pytz.timezone('US/Central')
+        with open(HISTORY_FILE, 'a') as f:
+            f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] {username}: {action}\n")
+        user_log[action] = current_time
+# Text cleaning for TTS
+def clean_text_for_tts(text):
+    cleaned = re.sub(r'[#*!\[\]]+', '', text)
+    cleaned = ' '.join(cleaned.split())
+    return cleaned[:200] if cleaned else "No text to speak"
+# Chat saver
+async def save_chat_entry(username, message, is_markdown=False):
+    await asyncio.to_thread(log_action, username, "💬🔒 - Chat saver")
+    central = pytz.timezone('US/Central')
+    timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
+    if is_markdown:
+        entry = f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
+    else:
+        entry = f"[{timestamp}] {username}: {message}"
+    await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"{entry}\n"))
+    voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
+    cleaned_message = clean_text_for_tts(message)
+    audio_file = await async_edge_tts_generate(cleaned_message, voice)
+    if audio_file:
+        with open(HISTORY_FILE, 'a') as f:
+            f.write(f"[{timestamp}] {username}: Audio generated - {audio_file}\n")
+    await broadcast_message(f"{username}|{message}", "chat")
+    st.session_state.last_chat_update = time.time()
+    return audio_file
+# Chat loader
+async def load_chat():
+    username = st.session_state.get('username', 'System 🌟')
+    await asyncio.to_thread(log_action, username, "📜🚀 - Chat loader")
+    if not os.path.exists(CHAT_FILE):
+        await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! 🎤\n"))
+    with open(CHAT_FILE, 'r') as f:
+        content = await asyncio.to_thread(f.read)
+    return content
+# Audio generator
+async def async_edge_tts_generate(text, voice, rate=0, pitch=0, file_format="mp3"):
+    await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "🎶🌟 - Audio maker")
+    timestamp = format_timestamp_prefix(st.session_state.get('username', 'System 🌟'))
+    filename = f"{timestamp}.{file_format}"
+    filepath = os.path.join(AUDIO_DIR, filename)
+    communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
+    try:
+        await communicate.save(filepath)
+        return filepath if os.path.exists(filepath) else None
+    except edge_tts.exceptions.NoAudioReceived:
+        with open(HISTORY_FILE, 'a') as f:
+            central = pytz.timezone('US/Central')
+            f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] Audio failed for '{text}'\n")
+        return None
+# Audio player
+def play_and_download_audio(file_path):
+    if file_path and os.path.exists(file_path):
+        st.audio(file_path)
+        if file_path not in st.session_state.base64_cache:
+            with open(file_path, "rb") as f:
+                b64 = base64.b64encode(f.read()).decode()
+            st.session_state.base64_cache[file_path] = b64
+        b64 = st.session_state.base64_cache[file_path]
+        dl_link = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file_path)}">🎵 Download {os.path.basename(file_path)}</a>'
+        st.markdown(dl_link, unsafe_allow_html=True)
+# Websocket handler
+async def websocket_handler(websocket, path):
+    username = st.session_state.get('username', 'System 🌟')
+    await asyncio.to_thread(log_action, username, "🌐🔗 - Websocket handler")
+    try:
+        client_id = str(uuid.uuid4())
+        room_id = "chat"
+        st.session_state.active_connections.setdefault(room_id, {})[client_id] = websocket
+        chat_content = await load_chat()
+        username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys())))
+        if not any(f"Client-{client_id}" in line for line in chat_content.split('\n')):
+            await save_chat_entry(f"Client-{client_id}", f"{username} has joined {START_ROOM}!")
+        async for message in websocket:
+            parts = message.split('|', 1)
+            if len(parts) == 2:
+                username, content = parts
+                await save_chat_entry(username, content)
+    except websockets.ConnectionClosed:
+        pass
+    finally:
+        if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]:
+            del st.session_state.active_connections[room_id][client_id]
+# Message broadcaster
+async def broadcast_message(message, room_id):
+    await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "📢✈️ - Message broadcaster")
+    if room_id in st.session_state.active_connections:
+        disconnected = []
+        for client_id, ws in st.session_state.active_connections[room_id].items():
+            try:
+                await ws.send(message)
+            except websockets.ConnectionClosed:
+                disconnected.append(client_id)
+        for client_id in disconnected:
+            del st.session_state.active_connections[room_id][client_id]
+# Server starter
+async def run_websocket_server():
+    await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "🖥️🌀 - Server starter")
+    if not st.session_state.server_running:
+        server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
+        st.session_state.server_running = True
+        await server.wait_closed()
+# PDF to Audio Processor
+class AudioProcessor:
+    def __init__(self):
+        self.cache_dir = "audio_cache"
+        os.makedirs(self.cache_dir, exist_ok=True)
+        self.metadata = self._load_metadata()
+    def _load_metadata(self):
+        metadata_file = os.path.join(self.cache_dir, "metadata.json")
+        return json.load(open(metadata_file)) if os.path.exists(metadata_file) else {}
+    def _save_metadata(self):
+        metadata_file = os.path.join(self.cache_dir, "metadata.json")
+        with open(metadata_file, 'w') as f:
+            json.dump(self.metadata, f)
+    async def create_audio(self, text, voice='en-US-AriaNeural'):
+        cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
+        cache_path = os.path.join(self.cache_dir, f"{cache_key}.mp3")
+        if cache_key in self.metadata and os.path.exists(cache_path):
+            return open(cache_path, 'rb').read()
+        text = text.replace("\n", " ").replace("</s>", " ").strip()
+        if not text:
+            return None
+        communicate = edge_tts.Communicate(text, voice)
+        await communicate.save(cache_path)
+        self.metadata[cache_key] = {
+            'timestamp': datetime.now().isoformat(),
+            'text_length': len(text),
+            'voice': voice
+        }
+        self._save_metadata()
+        return open(cache_path, 'rb').read()
+def get_download_link(bin_data, filename, size_mb=None):
+    b64 = base64.b64encode(bin_data).decode()
+    size_str = f"({size_mb:.1f} MB)" if size_mb else ""
+    return f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">📥 {filename} {size_str}</a>'
+def process_pdf(pdf_file, max_pages, voice, audio_processor):
+    reader = PdfReader(pdf_file)
+    total_pages = min(len(reader.pages), max_pages)
+    texts, audios = [], {}
+    async def process_page(i, text):
+        audio_data = await audio_processor.create_audio(text, voice)
+        audios[i] = audio_data
+    for i in range(total_pages):
+        text = reader.pages[i].extract_text()
+        texts.append(text)
+        threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start()
+    return texts, audios, total_pages
+# AI Lookup
+def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
+    client = anthropic.Anthropic(api_key=anthropic_key)
+    response = client.messages.create(
+        model="claude-3-sonnet-20240229",
+        max_tokens=1000,
+        messages=[{"role": "user", "content": q}]
+    )
+    result = response.content[0].text
+    st.markdown("### Claude's reply 🧠:")
+    st.markdown(result)
+    md_file = create_file(q, result)
+    audio_file = speak_with_edge_tts(result, st.session_state.tts_voice)
+    play_and_download_audio(audio_file)
+    if useArxiv:
+        q += result
+        gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+        refs = gradio_client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
+        result = f"🔎 {q}\n\n{refs}"
+        md_file, audio_file = save_qa_with_audio(q, result)
+        play_and_download_audio(audio_file)
+        papers = parse_arxiv_refs(refs)
+        if papers and useArxivAudio:
+            asyncio.run(create_paper_audio_files(papers, q))
+        return result, papers
+    return result, []
+def create_file(prompt, response, file_type="md"):
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{timestamp}_{clean_text_for_filename(prompt[:40] + ' ' + response[:40])}.{file_type}"
+    with open(filename, 'w', encoding='utf-8') as f:
+        f.write(prompt + "\n\n" + response)
+    return filename
+def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
+    result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
+    return result
+def save_qa_with_audio(question, answer, voice=None):
+    voice = voice or st.session_state.tts_voice
+    md_file = create_file(question, answer, "md")
+    audio_file = speak_with_edge_tts(f"{question}\n\nAnswer: {answer}", voice)
+    return md_file, audio_file
+def clean_text_for_filename(text):
+    text = text.lower()
+    text = re.sub(r'[^\w\s-]', '', text)
+    return '_'.join(text.split())[:200]
+def parse_arxiv_refs(ref_text):
+    # Simplified parsing for brevity
+    return [{"title": line.strip(), "url": "", "authors": "", "summary": "", "full_audio": None, "download_base64": ""} for line in ref_text.split('\n') if line.strip()]
+async def create_paper_audio_files(papers, input_question):
+    for paper in papers:
+        audio_text = f"{paper['title']}"
+        audio_file = await async_edge_tts_generate(audio_text, st.session_state.tts_voice)
+        paper['full_audio'] = audio_file
+        if audio_file:
+            with open(audio_file, "rb") as f:
+                b64 = base64.b64encode(f.read()).decode()
+            paper['download_base64'] = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(audio_file)}">🎵 Download</a>'
+# ASR Component HTML (Fixed Audio Chat)
+ASR_HTML = """
+<html>
+<head>
+    <title>Continuous Speech Demo</title>
+    <style>
+        body { font-family: sans-serif; padding: 20px; max-width: 800px; margin: 0 auto; }
+        button { padding: 10px 20px; margin: 10px 5px; font-size: 16px; }
+        #status { margin: 10px 0; padding: 10px; background: #e8f5e9; border-radius: 4px; }
+        #output { white-space: pre-wrap; padding: 15px; background: #f5f5f5; border-radius: 4px; margin: 10px 0; min-height: 100px; max-height: 400px; overflow-y: auto; }
+    </style>
+</head>
+<body>
+    <div>
+        <button id="start">Start Listening</button>
+        <button id="stop" disabled>Stop Listening</button>
+        <button id="clear">Clear Text</button>
+    </div>
+    <div id="status">Ready</div>
+    <div id="output"></div>
+    <script>
+        if (!('webkitSpeechRecognition' in window)) {
+            alert('Speech recognition not supported');
+        } else {
+            const recognition = new webkitSpeechRecognition();
+            const startButton = document.getElementById('start');
+            const stopButton = document.getElementById('stop');
+            const clearButton = document.getElementById('clear');
+            const status = document.getElementById('status');
+            const output = document.getElementById('output');
+            let fullTranscript = '';
+            let lastUpdateTime = Date.now();
+            recognition.continuous = true;
+            recognition.interimResults = true;
+            const startRecognition = () => {
+                try {
+                    recognition.start();
+                    status.textContent = 'Listening...';
+                    startButton.disabled = true;
+                    stopButton.disabled = false;
+                } catch (e) {
+                    console.error(e);
+                    status.textContent = 'Error: ' + e.message;
+                }
+            };
+            window.addEventListener('load', () => setTimeout(startRecognition, 1000));
+            startButton.onclick = startRecognition;
+            stopButton.onclick = () => {
+                recognition.stop();
+                status.textContent = 'Stopped';
+                startButton.disabled = false;
+                stopButton.disabled = true;
+            };
+            clearButton.onclick = () => {
+                fullTranscript = '';
+                output.textContent = '';
+                sendDataToPython({value: '', dataType: "json"});
+            };
+            recognition.onresult = (event) => {
+                let interimTranscript = '';
+                let finalTranscript = '';
+                for (let i = event.resultIndex; i < event.results.length; i++) {
+                    const transcript = event.results[i][0].transcript;
+                    if (event.results[i].isFinal) {
+                        finalTranscript += transcript + '\\n';
+                    } else {
+                        interimTranscript += transcript;
+                    }
+                }
+                if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
+                    if (finalTranscript) fullTranscript += finalTranscript;
+                    lastUpdateTime = Date.now();
+                    output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
+                    output.scrollTop = output.scrollHeight;
+                    sendDataToPython({value: fullTranscript, dataType: "json"});
+                }
+            };
+            recognition.onend = () => {
+                if (!stopButton.disabled) {
+                    try {
+                        recognition.start();
+                        console.log('Restarted recognition');
+                    } catch (e) {
+                        console.error('Failed to restart:', e);
+                        status.textContent = 'Error restarting: ' + e.message;
+                        startButton.disabled = false;
+                        stopButton.disabled = true;
+                    }
+                }
+            };
+            recognition.onerror = (event) => {
+                console.error('Recognition error:', event.error);
+                status.textContent = 'Error: ' + event.error;
+                if (event.error === 'not-allowed' || event.error === 'service-not-allowed') {
+                    startButton.disabled = false;
+                    stopButton.disabled = true;
+                }
+            };
+        }
+        function sendDataToPython(data) {
+            window.parent.postMessage({
+                isStreamlitMessage: true,
+                type: "streamlit:setComponentValue",
+                ...data
+            }, "*");
+        }
+        window.addEventListener('load', () => {
+            window.setTimeout(() => {
+                window.parent.postMessage({
+                    isStreamlitMessage: true,
+                    type: "streamlit:setFrameHeight",
+                    height: document.documentElement.clientHeight
+                }, "*");
+            }, 0);
+        });
+    </script>
+</body>
+</html>
+"""
+# Main execution
+def main():
+    NODE_NAME, port = get_node_name()
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    async def async_interface():
+        if 'username' not in st.session_state:
+            chat_content = await load_chat()
+            available_names = [name for name in FUN_USERNAMES if not any(f"{name} has joined" in line for line in chat_content.split('\n'))]
+            st.session_state.username = random.choice(available_names) if available_names else random.choice(list(FUN_USERNAMES.keys()))
+            st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username]
+            st.markdown(f"**🎙️ Voice**: {st.session_state.tts_voice} 🗣️ for {st.session_state.username}")
+        st.title(f"🤖🧠MMO Chat & Research for {st.session_state.username}📝🔬")
+        st.markdown(f"Welcome to {START_ROOM} - chat, research, upload, and more! 🎉")
+        if not st.session_state.server_task:
+            st.session_state.server_task = loop.create_task(run_websocket_server())
+        # Tabs
+        tab_main = st.radio("Action:", ["🎤 Chat & Voice", "📸 Media", "🔍 ArXiv", "📚 PDF to Audio"], horizontal=True)
+        useArxiv = st.checkbox("Search Arxiv", value=True)
+        useArxivAudio = st.checkbox("Generate Arxiv Audio", value=False)
+        # Chat & Voice Tab
+        if tab_main == "🎤 Chat & Voice":
+            st.subheader(f"{START_ROOM} Chat 💬")
+            chat_content = await load_chat()
+            chat_lines = chat_content.split('\n')
+            for i, line in enumerate(chat_lines):
+                if line.strip() and ': ' in line:
+                    st.markdown(line)
+                    if st.button("📢 Speak", key=f"speak_{i}"):
+                        audio_file = await async_edge_tts_generate(clean_text_for_tts(line.split(': ', 1)[1]), st.session_state.tts_voice)
+                        play_and_download_audio(audio_file)
+            message = st.text_input(f"Message as {st.session_state.username}", key="message_input")
+            if st.button("Send 🚀") and message.strip():
+                await save_chat_entry(st.session_state.username, message, is_markdown=True)
+                st.rerun()
+            st.subheader("🎤 Continuous Speech Input")
+            asr_component = components.html(ASR_HTML, height=400)
+            if asr_component and isinstance(asr_component, dict) and 'value' in asr_component:
+                transcript = asr_component['value'].strip()
+                if transcript and transcript != st.session_state.last_transcript:
+                    await save_chat_entry(st.session_state.username, transcript, is_markdown=True)
+                    st.session_state.last_transcript = transcript
+                    st.rerun()
+        # Media Tab with Galleries
+        elif tab_main == "📸 Media":
+            st.header("📸 Media Gallery")
+            tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
+            with tabs[0]:
+                st.subheader("🎵 Audio Files")
+                audio_files = glob.glob(f"{MEDIA_DIR}/*.mp3")
+                for a in audio_files:
+                    with st.expander(os.path.basename(a)):
+                        play_and_download_audio(a)
+            with tabs[1]:
+                st.subheader("🖼 Images")
+                imgs = glob.glob(f"{MEDIA_DIR}/*.png") + glob.glob(f"{MEDIA_DIR}/*.jpg")
+                if imgs:
+                    cols = st.columns(3)
+                    for i, f in enumerate(imgs):
+                        with cols[i % 3]:
+                            st.image(f, use_container_width=True)
+            with tabs[2]:
+                st.subheader("🎥 Videos")
+                vids = glob.glob(f"{MEDIA_DIR}/*.mp4")
+                for v in vids:
+                    with st.expander(os.path.basename(v)):
+                        st.video(v)
+            uploaded_file = st.file_uploader("Upload Media", type=['png', 'jpg', 'mp4', 'mp3'])
+            if uploaded_file:
+                timestamp = format_timestamp_prefix(st.session_state.username)
+                ext = uploaded_file.name.split('.')[-1]
+                file_hash = hashlib.md5(uploaded_file.getbuffer()).hexdigest()[:8]
+                filename = f"{timestamp}-{file_hash}.{ext}"
+                file_path = os.path.join(MEDIA_DIR, filename)
+                with open(file_path, 'wb') as f:
+                    f.write(uploaded_file.getbuffer())
+                await save_chat_entry(st.session_state.username, f"Uploaded media: {file_path}")
+                st.rerun()
+        # ArXiv Tab
+        elif tab_main == "🔍 ArXiv":
+            st.subheader("🔍 Query ArXiv")
+            q = st.text_input("🔍 Query:")
+            if q and st.button("🔍 Run"):
+                result, papers = perform_ai_lookup(q, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
+                for paper in papers:
+                    with st.expander(paper['title']):
+                        st.markdown(f"**Summary**: {paper['summary']}")
+                        if paper['full_audio']:
+                            play_and_download_audio(paper['full_audio'])
+        # PDF to Audio Tab
+        elif tab_main == "📚 PDF to Audio":
+            st.subheader("📚 PDF to Audio Converter")
+            audio_processor = AudioProcessor()
+            uploaded_file = st.file_uploader("Choose a PDF file", "pdf")
+            max_pages = st.slider('Pages to process', 1, 100, 10)
+            if uploaded_file:
+                with st.spinner('Processing PDF...'):
+                    texts, audios, total_pages = process_pdf(uploaded_file, max_pages, st.session_state.tts_voice, audio_processor)
+                    for i, text in enumerate(texts):
+                        with st.expander(f"Page {i+1}"):
+                            st.markdown(text)
+                            while i not in audios:
+                                time.sleep(0.1)
+                            if audios[i]:
+                                st.audio(audios[i], format='audio/mp3')
+                                st.markdown(get_download_link(audios[i], f'page_{i+1}.mp3', len(audios[i]) / (1024 * 1024)), unsafe_allow_html=True)
+        # Sidebar
+        st.sidebar.subheader("Voice Settings")
+        new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
+        if new_username != st.session_state.username:
+            await save_chat_entry("System 🌟", f"{st.session_state.username} changed to {new_username}")
+            st.session_state.username = new_username
+            st.session_state.tts_voice = FUN_USERNAMES[new_username]
+            st.rerun()
+    loop.run_until_complete(async_interface())
+if __name__ == "__main__":
+    main()