Spaces:

awacke1
/

SectorMultiplayerChatServer

Sleeping

App Files Files Community

awacke1 commited on Mar 2

Commit

e987f4a

verified ·

1 Parent(s): b020576

Update app.py

Browse files

Files changed (1) hide show

app.py +336 -404

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import streamlit as st
 import asyncio
 import websockets
 import uuid
 import argparse
-from datetime import datetime
 import os
 import random
 import time
@@ -17,7 +17,6 @@ import edge_tts
 from audio_recorder_streamlit import audio_recorder
 import nest_asyncio
 import re
-from streamlit_paste_button import paste_image_button
 import pytz
 import shutil
 import anthropic
@@ -29,23 +28,24 @@ import zipfile
 from gradio_client import Client
 from dotenv import load_dotenv
 from streamlit_marquee import streamlit_marquee
-# Patch for nested async
 nest_asyncio.apply()
-# Static config
-icons = '🤖🧠🔬📝'
-START_ROOM = "Sector 🌌"
-# Page setup
 st.set_page_config(
-    page_title="🤖🧠MMO Chat & Research Brain📝🔬",
-    page_icon=icons,
     layout="wide",
     initial_sidebar_state="auto"
 )
-# Funky usernames with voices
 FUN_USERNAMES = {
     "CosmicJester 🌌": "en-US-AriaNeural",
     "PixelPanda 🐼": "en-US-JennyNeural",
@@ -57,481 +57,413 @@ FUN_USERNAMES = {
     "GalacticGopher 🌍": "en-AU-WilliamNeural",
     "RocketRaccoon 🚀": "en-CA-LiamNeural",
     "EchoElf 🧝": "en-US-AnaNeural",
-    "PhantomFox 🦊": "en-US-BrandonNeural",
-    "WittyWizard 🧙": "en-GB-ThomasNeural",
-    "LunarLlama 🌙": "en-AU-FreyaNeural",
-    "SolarSloth ☀️": "en-CA-LindaNeural",
-    "AstroAlpaca 🦙": "en-US-ChristopherNeural",
-    "CyberCoyote 🐺": "en-GB-ElliotNeural",
-    "MysticMoose 🦌": "en-AU-JamesNeural",
-    "GlitchGnome 🧚": "en-CA-EthanNeural",
-    "VortexViper 🐍": "en-US-AmberNeural",
-    "ChronoChimp 🐒": "en-GB-LibbyNeural"
 }
-# Directories
-CHAT_DIR = "chat_logs"
-VOTE_DIR = "vote_logs"
-AUDIO_DIR = "audio_logs"
-HISTORY_DIR = "history_logs"
 MEDIA_DIR = "media_files"
-os.makedirs(CHAT_DIR, exist_ok=True)
-os.makedirs(VOTE_DIR, exist_ok=True)
-os.makedirs(AUDIO_DIR, exist_ok=True)
-os.makedirs(HISTORY_DIR, exist_ok=True)
-os.makedirs(MEDIA_DIR, exist_ok=True)
-CHAT_FILE = os.path.join(CHAT_DIR, "global_chat.md")
-QUOTE_VOTES_FILE = os.path.join(VOTE_DIR, "quote_votes.md")
-MEDIA_VOTES_FILE = os.path.join(VOTE_DIR, "media_votes.md")
-HISTORY_FILE = os.path.join(HISTORY_DIR, "chat_history.md")
-# Unicode digits
-UNICODE_DIGITS = {i: f"{i}\uFE0F⃣" for i in range(10)}
-# Unicode fonts (simplified for brevity)
-UNICODE_FONTS = [
-    ("Normal", lambda x: x),
-    ("Bold", lambda x: "".join(chr(ord(c) + 0x1D400 - 0x41) if 'A' <= c <= 'Z' else chr(ord(c) + 0x1D41A - 0x61) if 'a' <= c <= 'z' else c for c in x)),
-    # Add other font styles as needed...
-]
-# Global state
-if 'server_running' not in st.session_state:
-    st.session_state.server_running = False
-if 'server_task' not in st.session_state:
-    st.session_state.server_task = None
-if 'active_connections' not in st.session_state:
-    st.session_state.active_connections = {}
-if 'media_notifications' not in st.session_state:
-    st.session_state.media_notifications = []
-if 'last_chat_update' not in st.session_state:
-    st.session_state.last_chat_update = 0
-if 'displayed_chat_lines' not in st.session_state:
-    st.session_state.displayed_chat_lines = []
-if 'message_text' not in st.session_state:
-    st.session_state.message_text = ""
-if 'audio_cache' not in st.session_state:
-    st.session_state.audio_cache = {}
-if 'pasted_image_data' not in st.session_state:
-    st.session_state.pasted_image_data = None
-if 'quote_line' not in st.session_state:
-    st.session_state.quote_line = None
-if 'refresh_rate' not in st.session_state:
-    st.session_state.refresh_rate = 5
-if 'base64_cache' not in st.session_state:
-    st.session_state.base64_cache = {}
-if 'transcript_history' not in st.session_state:
-    st.session_state.transcript_history = []
-if 'last_transcript' not in st.session_state:
-    st.session_state.last_transcript = ""
-if 'image_hashes' not in st.session_state:
-    st.session_state.image_hashes = set()
-if 'tts_voice' not in st.session_state:
-    st.session_state.tts_voice = "en-US-AriaNeural"
-if 'chat_history' not in st.session_state:
-    st.session_state.chat_history = []
-# API Keys
 load_dotenv()
-anthropic_key = os.getenv('ANTHROPIC_API_KEY', "")
-openai_api_key = os.getenv('OPENAI_API_KEY', "")
-if 'ANTHROPIC_API_KEY' in st.secrets:
-    anthropic_key = st.secrets['ANTHROPIC_API_KEY']
-if 'OPENAI_API_KEY' in st.secrets:
-    openai_api_key = st.secrets['OPENAI_API_KEY']
 openai_client = openai.OpenAI(api_key=openai_api_key)
-# Timestamp formatting
-def format_timestamp_prefix(username):
     central = pytz.timezone('US/Central')
     now = datetime.now(central)
-    return f"{now.strftime('%I-%M-%p-ct-%m-%d-%Y')}-by-{username}"
-# Image hash computation
-def compute_image_hash(image_data):
-    if isinstance(image_data, Image.Image):
-        img_byte_arr = io.BytesIO()
-        image_data.save(img_byte_arr, format='PNG')
-        img_bytes = img_byte_arr.getvalue()
-    else:
-        img_bytes = image_data
-    return hashlib.md5(img_bytes).hexdigest()[:8]
-# Node naming
-def get_node_name():
-    parser = argparse.ArgumentParser(description='Start a chat node')
-    parser.add_argument('--node-name', type=str, default=None)
-    parser.add_argument('--port', type=int, default=8501)
-    args = parser.parse_args()
-    return args.node_name or f"node-{uuid.uuid4().hex[:8]}", args.port
-# Action logger
-def log_action(username, action):
-    if 'action_log' not in st.session_state:
-        st.session_state.action_log = {}
-    user_log = st.session_state.action_log.setdefault(username, {})
-    current_time = time.time()
-    user_log = {k: v for k, v in user_log.items() if current_time - v < 10}
-    st.session_state.action_log[username] = user_log
-    if action not in user_log:
-        central = pytz.timezone('US/Central')
-        with open(HISTORY_FILE, 'a') as f:
-            f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] {username}: {action}\n")
-        user_log[action] = current_time
-# Text cleaning for TTS
-def clean_text_for_tts(text):
-    cleaned = re.sub(r'[#*!\[\]]+', '', text)
-    cleaned = ' '.join(cleaned.split())
-    return cleaned[:200] if cleaned else "No text to speak"
-# Chat saver
 async def save_chat_entry(username, message, is_markdown=False):
-    await asyncio.to_thread(log_action, username, "💬🔒 - Chat saver")
     central = pytz.timezone('US/Central')
     timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
-    if is_markdown:
-        entry = f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
-    else:
-        entry = f"[{timestamp}] {username}: {message}"
-    await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"{entry}\n"))
     voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
-    cleaned_message = clean_text_for_tts(message)
-    audio_file = await async_edge_tts_generate(cleaned_message, voice)
     if audio_file:
-        with open(HISTORY_FILE, 'a') as f:
-            f.write(f"[{timestamp}] {username}: Audio generated - {audio_file}\n")
     await broadcast_message(f"{username}|{message}", "chat")
     st.session_state.last_chat_update = time.time()
     return audio_file
-# Chat loader
 async def load_chat():
-    username = st.session_state.get('username', 'System 🌟')
-    await asyncio.to_thread(log_action, username, "📜🚀 - Chat loader")
     if not os.path.exists(CHAT_FILE):
-        await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! 🎤\n"))
-    with open(CHAT_FILE, 'r') as f:
-        content = await asyncio.to_thread(f.read)
-    return content
-# Audio generator
-async def async_edge_tts_generate(text, voice, rate=0, pitch=0, file_format="mp3"):
-    await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "🎶🌟 - Audio maker")
-    timestamp = format_timestamp_prefix(st.session_state.get('username', 'System 🌟'))
-    filename = f"{timestamp}.{file_format}"
-    filepath = os.path.join(AUDIO_DIR, filename)
-    communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
-    try:
-        await communicate.save(filepath)
-        return filepath if os.path.exists(filepath) else None
-    except edge_tts.exceptions.NoAudioReceived:
-        with open(HISTORY_FILE, 'a') as f:
-            central = pytz.timezone('US/Central')
-            f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] Audio failed for '{text}'\n")
-        return None
-# Audio player
-def play_and_download_audio(file_path):
-    if file_path and os.path.exists(file_path):
-        st.audio(file_path)
-        if file_path not in st.session_state.base64_cache:
-            with open(file_path, "rb") as f:
-                b64 = base64.b64encode(f.read()).decode()
-            st.session_state.base64_cache[file_path] = b64
-        b64 = st.session_state.base64_cache[file_path]
-        dl_link = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file_path)}">🎵 Download {os.path.basename(file_path)}</a>'
-        st.markdown(dl_link, unsafe_allow_html=True)
-# Websocket handler
 async def websocket_handler(websocket, path):
-    username = st.session_state.get('username', 'System 🌟')
-    await asyncio.to_thread(log_action, username, "🌐🔗 - Websocket handler")
     try:
-        client_id = str(uuid.uuid4())
-        room_id = "chat"
-        st.session_state.active_connections.setdefault(room_id, {})[client_id] = websocket
-        chat_content = await load_chat()
-        username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys())))
-        if not any(f"Client-{client_id}" in line for line in chat_content.split('\n')):
-            await save_chat_entry(f"Client-{client_id}", f"{username} has joined {START_ROOM}!")
         async for message in websocket:
-            parts = message.split('|', 1)
-            if len(parts) == 2:
-                username, content = parts
-                await save_chat_entry(username, content)
-    except websockets.ConnectionClosed:
-        pass
     finally:
         if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]:
             del st.session_state.active_connections[room_id][client_id]
-# Message broadcaster
 async def broadcast_message(message, room_id):
-    await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "📢✈️ - Message broadcaster")
     if room_id in st.session_state.active_connections:
         disconnected = []
         for client_id, ws in st.session_state.active_connections[room_id].items():
-            try:
-                await ws.send(message)
-            except websockets.ConnectionClosed:
-                disconnected.append(client_id)
-        for client_id in disconnected:
-            del st.session_state.active_connections[room_id][client_id]
-# Server starter
 async def run_websocket_server():
-    await asyncio.to_thread(log_action, st.session_state.get('username', 'System 🌟'), "🖥️🌀 - Server starter")
     if not st.session_state.server_running:
         server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
         st.session_state.server_running = True
         await server.wait_closed()
-# PDF to Audio Processor
 class AudioProcessor:
     def __init__(self):
-        self.cache_dir = "audio_cache"
         os.makedirs(self.cache_dir, exist_ok=True)
-        self.metadata = self._load_metadata()
-    def _load_metadata(self):
-        metadata_file = os.path.join(self.cache_dir, "metadata.json")
-        return json.load(open(metadata_file)) if os.path.exists(metadata_file) else {}
     def _save_metadata(self):
-        metadata_file = os.path.join(self.cache_dir, "metadata.json")
-        with open(metadata_file, 'w') as f:
-            json.dump(self.metadata, f)
     async def create_audio(self, text, voice='en-US-AriaNeural'):
         cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
-        cache_path = os.path.join(self.cache_dir, f"{cache_key}.mp3")
         if cache_key in self.metadata and os.path.exists(cache_path):
             return open(cache_path, 'rb').read()
-        text = text.replace("\n", " ").replace("</s>", " ").strip()
-        if not text:
-            return None
         communicate = edge_tts.Communicate(text, voice)
         await communicate.save(cache_path)
-        self.metadata[cache_key] = {
-            'timestamp': datetime.now().isoformat(),
-            'text_length': len(text),
-            'voice': voice
-        }
         self._save_metadata()
         return open(cache_path, 'rb').read()
-def get_download_link(bin_data, filename, size_mb=None):
-    b64 = base64.b64encode(bin_data).decode()
-    size_str = f"({size_mb:.1f} MB)" if size_mb else ""
-    return f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">📥 {filename} {size_str}</a>'
 def process_pdf(pdf_file, max_pages, voice, audio_processor):
     reader = PdfReader(pdf_file)
     total_pages = min(len(reader.pages), max_pages)
     texts, audios = [], {}
-    async def process_page(i, text):
-        audio_data = await audio_processor.create_audio(text, voice)
-        audios[i] = audio_data
     for i in range(total_pages):
         text = reader.pages[i].extract_text()
         texts.append(text)
         threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start()
     return texts, audios, total_pages
-# AI Lookup
-def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
     client = anthropic.Anthropic(api_key=anthropic_key)
-    response = client.messages.create(
-        model="claude-3-sonnet-20240229",
-        max_tokens=1000,
-        messages=[{"role": "user", "content": q}]
-    )
     result = response.content[0].text
-    st.markdown("### Claude's reply 🧠:")
-    st.markdown(result)
     md_file = create_file(q, result)
-    audio_file = speak_with_edge_tts(result, st.session_state.tts_voice)
     play_and_download_audio(audio_file)
     if useArxiv:
         q += result
         gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
         refs = gradio_client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
         result = f"🔎 {q}\n\n{refs}"
-        md_file, audio_file = save_qa_with_audio(q, result)
         play_and_download_audio(audio_file)
         papers = parse_arxiv_refs(refs)
-        if papers and useArxivAudio:
-            asyncio.run(create_paper_audio_files(papers, q))
         return result, papers
     return result, []
-def create_file(prompt, response, file_type="md"):
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    filename = f"{timestamp}_{clean_text_for_filename(prompt[:40] + ' ' + response[:40])}.{file_type}"
-    with open(filename, 'w', encoding='utf-8') as f:
-        f.write(prompt + "\n\n" + response)
-    return filename
-def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
-    result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
-    return result
-def save_qa_with_audio(question, answer, voice=None):
-    voice = voice or st.session_state.tts_voice
-    md_file = create_file(question, answer, "md")
-    audio_file = speak_with_edge_tts(f"{question}\n\nAnswer: {answer}", voice)
-    return md_file, audio_file
-def clean_text_for_filename(text):
-    text = text.lower()
-    text = re.sub(r'[^\w\s-]', '', text)
-    return '_'.join(text.split())[:200]
-def parse_arxiv_refs(ref_text):
-    return [{"title": line.strip(), "url": "", "authors": "", "summary": "", "full_audio": None, "download_base64": ""} for line in ref_text.split('\n') if line.strip()]
-async def create_paper_audio_files(papers, input_question):
-    for paper in papers:
-        audio_text = f"{paper['title']}"
-        audio_file = await async_edge_tts_generate(audio_text, st.session_state.tts_voice)
-        paper['full_audio'] = audio_file
-        if audio_file:
-            with open(audio_file, "rb") as f:
-                b64 = base64.b64encode(f.read()).decode()
-            paper['download_base64'] = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(audio_file)}">🎵 Download</a>'
-# Main execution
-def main():
-    NODE_NAME, port = get_node_name()
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    async def async_interface():
-        if 'username' not in st.session_state:
-            chat_content = await load_chat()
-            available_names = [name for name in FUN_USERNAMES if not any(f"{name} has joined" in line for line in chat_content.split('\n'))]
-            st.session_state.username = random.choice(available_names) if available_names else random.choice(list(FUN_USERNAMES.keys()))
-            st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username]
-            st.markdown(f"**🎙️ Voice**: {st.session_state.tts_voice} 🗣️ for {st.session_state.username}")
-        st.title(f"🤖🧠MMO Chat & Research for {st.session_state.username}📝🔬")
-        st.markdown(f"Welcome to {START_ROOM} - chat, research, upload, and more! 🎉")
-        if not st.session_state.server_task:
-            st.session_state.server_task = loop.create_task(run_websocket_server())
-        # Tabs
-        tab_main = st.radio("Action:", ["🎤 Chat & Voice", "📸 Media", "🔍 ArXiv", "📚 PDF to Audio"], horizontal=True)
-        useArxiv = st.checkbox("Search Arxiv", value=True)
-        useArxivAudio = st.checkbox("Generate Arxiv Audio", value=False)
-        # Chat & Voice Tab
-        if tab_main == "🎤 Chat & Voice":
-            st.subheader(f"{START_ROOM} Chat 💬")
-            chat_content = await load_chat()
-            chat_lines = chat_content.split('\n')
-            for i, line in enumerate(chat_lines):
-                if line.strip() and ': ' in line:
-                    st.markdown(line)
-                    if st.button("📢 Speak", key=f"speak_{i}"):
-                        audio_file = await async_edge_tts_generate(clean_text_for_tts(line.split(': ', 1)[1]), st.session_state.tts_voice)
-                        play_and_download_audio(audio_file)
-            message = st.text_input(f"Message as {st.session_state.username}", key="message_input")
-            if st.button("Send 🚀") and message.strip():
-                await save_chat_entry(st.session_state.username, message, is_markdown=True)
-                st.rerun()
-            # Speech intake using the custom component
-            st.subheader("🎤 Continuous Speech Input")
-            from mycomponent import speech_component  # Import the component
-            transcript_data = speech_component(default_value=st.session_state.get('last_transcript', ''))
-            if transcript_data and 'value' in transcript_data:
-                transcript = transcript_data['value'].strip()
-                if transcript and transcript != st.session_state.last_transcript:
-                    await save_chat_entry(st.session_state.username, transcript, is_markdown=True)
-                    st.session_state.last_transcript = transcript
-                    st.rerun()
-        # Media Tab with Galleries
-        elif tab_main == "📸 Media":
-            st.header("📸 Media Gallery")
-            tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
-            with tabs[0]:
-                st.subheader("🎵 Audio Files")
-                audio_files = glob.glob(f"{MEDIA_DIR}/*.mp3")
-                for a in audio_files:
-                    with st.expander(os.path.basename(a)):
-                        play_and_download_audio(a)
-            with tabs[1]:
-                st.subheader("🖼 Images")
-                imgs = glob.glob(f"{MEDIA_DIR}/*.png") + glob.glob(f"{MEDIA_DIR}/*.jpg")
-                if imgs:
-                    cols = st.columns(3)
-                    for i, f in enumerate(imgs):
-                        with cols[i % 3]:
-                            st.image(f, use_container_width=True)
-            with tabs[2]:
-                st.subheader("🎥 Videos")
-                vids = glob.glob(f"{MEDIA_DIR}/*.mp4")
-                for v in vids:
-                    with st.expander(os.path.basename(v)):
-                        st.video(v)
-            uploaded_file = st.file_uploader("Upload Media", type=['png', 'jpg', 'mp4', 'mp3'])
-            if uploaded_file:
-                timestamp = format_timestamp_prefix(st.session_state.username)
-                ext = uploaded_file.name.split('.')[-1]
-                file_hash = hashlib.md5(uploaded_file.getbuffer()).hexdigest()[:8]
-                filename = f"{timestamp}-{file_hash}.{ext}"
-                file_path = os.path.join(MEDIA_DIR, filename)
-                with open(file_path, 'wb') as f:
-                    f.write(uploaded_file.getbuffer())
-                await save_chat_entry(st.session_state.username, f"Uploaded media: {file_path}")
                 st.rerun()
-        # ArXiv Tab
-        elif tab_main == "🔍 ArXiv":
-            st.subheader("🔍 Query ArXiv")
-            q = st.text_input("🔍 Query:")
-            if q and st.button("🔍 Run"):
-                result, papers = perform_ai_lookup(q, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
-                for paper in papers:
-                    with st.expander(paper['title']):
-                        st.markdown(f"**Summary**: {paper['summary']}")
-                        if paper['full_audio']:
-                            play_and_download_audio(paper['full_audio'])
-        # PDF to Audio Tab
-        elif tab_main == "📚 PDF to Audio":
-            st.subheader("📚 PDF to Audio Converter")
-            audio_processor = AudioProcessor()
-            uploaded_file = st.file_uploader("Choose a PDF file", "pdf")
-            max_pages = st.slider('Pages to process', 1, 100, 10)
-            if uploaded_file:
-                with st.spinner('Processing PDF...'):
-                    texts, audios, total_pages = process_pdf(uploaded_file, max_pages, st.session_state.tts_voice, audio_processor)
-                    for i, text in enumerate(texts):
-                        with st.expander(f"Page {i+1}"):
-                            st.markdown(text)
-                            while i not in audios:
-                                time.sleep(0.1)
-                            if audios[i]:
-                                st.audio(audios[i], format='audio/mp3')
-                                st.markdown(get_download_link(audios[i], f'page_{i+1}.mp3', len(audios[i]) / (1024 * 1024)), unsafe_allow_html=True)
-        # Sidebar
-        st.sidebar.subheader("Voice Settings")
-        new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
-        if new_username != st.session_state.username:
-            await save_chat_entry("System 🌟", f"{st.session_state.username} changed to {new_username}")
-            st.session_state.username = new_username
-            st.session_state.tts_voice = FUN_USERNAMES[new_username]
             st.rerun()
-    loop.run_until_complete(async_interface())
 if __name__ == "__main__":
     main()

+# 🚀 Main App - TalkingAIResearcher with Chat, Voice, Media, ArXiv, and More
 import streamlit as st
 import asyncio
 import websockets
 import uuid
 import argparse
 import os
 import random
 import time
 from audio_recorder_streamlit import audio_recorder
 import nest_asyncio
 import re
 import pytz
 import shutil
 import anthropic
 from gradio_client import Client
 from dotenv import load_dotenv
 from streamlit_marquee import streamlit_marquee
+from datetime import datetime
+from collections import defaultdict, Counter
+import pandas as pd
+# 🛠️ Patch asyncio for nesting glory
 nest_asyncio.apply()
+# 🎨 Page Config
 st.set_page_config(
+    page_title="🚲TalkingAIResearcher🏆",
+    page_icon="🚲🏆",
     layout="wide",
     initial_sidebar_state="auto"
 )
+# 🌟 Static Config
+icons = '🤖🧠🔬📝'
+START_ROOM = "Sector 🌌"
 FUN_USERNAMES = {
     "CosmicJester 🌌": "en-US-AriaNeural",
     "PixelPanda 🐼": "en-US-JennyNeural",
     "GalacticGopher 🌍": "en-AU-WilliamNeural",
     "RocketRaccoon 🚀": "en-CA-LiamNeural",
     "EchoElf 🧝": "en-US-AnaNeural",
 }
+EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))  # 🎙️ Voice options
+FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊"}
+# 📁 Directories
+for d in ["chat_logs", "vote_logs", "audio_logs", "history_logs", "media_files", "audio_cache"]:
+    os.makedirs(d, exist_ok=True)
+CHAT_FILE = "chat_logs/global_chat.md"
+HISTORY_FILE = "history_logs/chat_history.md"
 MEDIA_DIR = "media_files"
+AUDIO_CACHE_DIR = "audio_cache"
+# 🔑 API Keys
 load_dotenv()
+anthropic_key = os.getenv('ANTHROPIC_API_KEY', st.secrets.get('ANTHROPIC_API_KEY', ""))
+openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ""))
 openai_client = openai.OpenAI(api_key=openai_api_key)
+# 🕒 Timestamp Helper
+def format_timestamp_prefix(username=""):
     central = pytz.timezone('US/Central')
     now = datetime.now(central)
+    return f"{now.strftime('%Y%m%d_%H%M%S')}-by-{username}"
+# 📈 Performance Timer
+class PerformanceTimer:
+    def __init__(self, name): self.name, self.start = name, None
+    def __enter__(self): self.start = time.time(); return self
+    def __exit__(self, *args):
+        duration = time.time() - self.start
+        st.session_state['operation_timings'][self.name] = duration
+        st.session_state['performance_metrics'][self.name].append(duration)
+# 🎛️ Session State Init
+def init_session_state():
+    defaults = {
+        'server_running': False, 'server_task': None, 'active_connections': {},
+        'media_notifications': [], 'last_chat_update': 0, 'displayed_chat_lines': [],
+        'message_text': "", 'audio_cache': {}, 'pasted_image_data': None,
+        'quote_line': None, 'refresh_rate': 5, 'base64_cache': {},
+        'transcript_history': [], 'last_transcript': "", 'image_hashes': set(),
+        'tts_voice': "en-US-AriaNeural", 'chat_history': [], 'marquee_settings': {
+            "background": "#1E1E1E", "color": "#FFFFFF", "font-size": "14px",
+            "animationDuration": "20s", "width": "100%", "lineHeight": "35px"
+        }, 'operation_timings': {}, 'performance_metrics': defaultdict(list),
+        'enable_audio': True, 'download_link_cache': {}, 'username': None
+    }
+    for k, v in defaults.items():
+        if k not in st.session_state: st.session_state[k] = v
+# 🖌️ Marquee Helpers
+def update_marquee_settings_ui():
+    # 🎨 Sidebar marquee controls
+    st.sidebar.markdown("### 🎯 Marquee Settings")
+    cols = st.sidebar.columns(2)
+    with cols[0]:
+        st.session_state['marquee_settings']['background'] = st.color_picker("🎨 Background", "#1E1E1E")
+        st.session_state['marquee_settings']['color'] = st.color_picker("✍️ Text", "#FFFFFF")
+    with cols[1]:
+        st.session_state['marquee_settings']['font-size'] = f"{st.slider('📏 Size', 10, 24, 14)}px"
+        st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('⏱️ Speed', 1, 20, 20)}s"
+def display_marquee(text, settings, key_suffix=""):
+    # 🌈 Show marquee with truncation
+    truncated = text[:280] + "..." if len(text) > 280 else text
+    streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
+    st.write("")
+# 📝 Text & File Helpers
+def clean_text_for_tts(text): return re.sub(r'[#*!\[\]]+', '', ' '.join(text.split()))[:200] or "No text"
+def clean_text_for_filename(text): return '_'.join(re.sub(r'[^\w\s-]', '', text.lower()).split())[:200]
+def get_high_info_terms(text, top_n=10):
+    stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'}
+    words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
+    bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
+    filtered = [t for t in words + bi_grams if t not in stop_words and len(t.split()) <= 2]
+    return [t for t, _ in Counter(filtered).most_common(top_n)]
+def generate_filename(prompt, response, file_type="md"):
+    # 📁 Smart filename with info terms
+    prefix = format_timestamp_prefix()
+    terms = get_high_info_terms(prompt + " " + response, 5)
+    snippet = clean_text_for_filename(prompt[:40] + " " + response[:40])
+    wct, sw = len(prompt.split()), len(response.split())
+    dur = round((wct + sw) / 2.5)
+    base = '_'.join(list(dict.fromkeys(terms + [snippet])))[:200 - len(prefix) - len(f"_wct{wct}_sw{sw}_dur{dur}.{file_type}")]
+    return f"{prefix}{base}_wct{wct}_sw{sw}_dur{dur}.{file_type}"
+def create_file(prompt, response, file_type="md"):
+    # 📝 Save file with Q&A
+    filename = generate_filename(prompt, response, file_type)
+    with open(filename, 'w', encoding='utf-8') as f: f.write(prompt + "\n\n" + response)
+    return filename
+def get_download_link(file, file_type="mp3"):
+    # ⬇️ Cached download link
+    cache_key = f"dl_{file}"
+    if cache_key not in st.session_state['download_link_cache']:
+        with open(file, "rb") as f:
+            b64 = base64.b64encode(f.read()).decode()
+        st.session_state['download_link_cache'][cache_key] = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">{FILE_EMOJIS.get(file_type, '⬇️')} Download {os.path.basename(file)}</a>'
+    return st.session_state['download_link_cache'][cache_key]
+# 🎶 Audio Processing
+async def async_edge_tts_generate(text, voice, rate=0, pitch=0, file_format="mp3"):
+    # 🎵 Async TTS with caching
+    cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
+    if cache_key in st.session_state['audio_cache']: return st.session_state['audio_cache'][cache_key], 0
+    with PerformanceTimer("tts_generation"):
+        text = clean_text_for_tts(text)
+        if not text: return None, 0
+        filename = f"audio_{format_timestamp_prefix()}_{random.randint(1000, 9999)}.{file_format}"
+        communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
+        await communicate.save(filename)
+        st.session_state['audio_cache'][cache_key] = filename
+        return filename, time.time() - st.session_state['operation_timings']['tts_generation']
+def play_and_download_audio(file_path):
+    # 🔊 Play + download
+    if file_path and os.path.exists(file_path):
+        st.audio(file_path)
+        st.markdown(get_download_link(file_path), unsafe_allow_html=True)
 async def save_chat_entry(username, message, is_markdown=False):
+    # 💬 Save chat with audio
     central = pytz.timezone('US/Central')
     timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
+    entry = f"[{timestamp}] {username}: {message}" if not is_markdown else f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
+    with open(CHAT_FILE, 'a') as f: f.write(f"{entry}\n")
     voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
+    audio_file, _ = await async_edge_tts_generate(clean_text_for_tts(message), voice)
     if audio_file:
+        with open(HISTORY_FILE, 'a') as f: f.write(f"[{timestamp}] {username}: Audio - {audio_file}\n")
     await broadcast_message(f"{username}|{message}", "chat")
     st.session_state.last_chat_update = time.time()
     return audio_file
 async def load_chat():
+    # 📜 Load chat history
     if not os.path.exists(CHAT_FILE):
+        with open(CHAT_FILE, 'a') as f: f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! 🎤\n")
+    with open(CHAT_FILE, 'r') as f: return f.read()
+# 🌐 WebSocket Handling
 async def websocket_handler(websocket, path):
+    # 🤝 Handle WebSocket clients
+    client_id = str(uuid.uuid4())
+    room_id = "chat"
+    st.session_state.active_connections.setdefault(room_id, {})[client_id] = websocket
+    chat_content = await load_chat()
+    username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys())))
+    if not any(f"Client-{client_id}" in line for line in chat_content.split('\n')):
+        await save_chat_entry(f"Client-{client_id}", f"{username} has joined {START_ROOM}!")
     try:
         async for message in websocket:
+            username, content = message.split('|', 1)
+            await save_chat_entry(username, content)
     finally:
         if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]:
             del st.session_state.active_connections[room_id][client_id]
 async def broadcast_message(message, room_id):
+    # 📢 Broadcast to all clients
     if room_id in st.session_state.active_connections:
         disconnected = []
         for client_id, ws in st.session_state.active_connections[room_id].items():
+            try: await ws.send(message)
+            except websockets.ConnectionClosed: disconnected.append(client_id)
+        for client_id in disconnected: del st.session_state.active_connections[room_id][client_id]
 async def run_websocket_server():
+    # 🖥️ Start WebSocket server
     if not st.session_state.server_running:
         server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
         st.session_state.server_running = True
         await server.wait_closed()
+# 📚 PDF to Audio
 class AudioProcessor:
     def __init__(self):
+        self.cache_dir = AUDIO_CACHE_DIR
         os.makedirs(self.cache_dir, exist_ok=True)
+        self.metadata = json.load(open(f"{self.cache_dir}/metadata.json")) if os.path.exists(f"{self.cache_dir}/metadata.json") else {}
     def _save_metadata(self):
+        with open(f"{self.cache_dir}/metadata.json", 'w') as f: json.dump(self.metadata, f)
     async def create_audio(self, text, voice='en-US-AriaNeural'):
+        # 🎶 Generate cached audio
         cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
+        cache_path = f"{self.cache_dir}/{cache_key}.mp3"
         if cache_key in self.metadata and os.path.exists(cache_path):
             return open(cache_path, 'rb').read()
+        text = clean_text_for_tts(text)
+        if not text: return None
         communicate = edge_tts.Communicate(text, voice)
         await communicate.save(cache_path)
+        self.metadata[cache_key] = {'timestamp': datetime.now().isoformat(), 'text_length': len(text), 'voice': voice}
         self._save_metadata()
         return open(cache_path, 'rb').read()
 def process_pdf(pdf_file, max_pages, voice, audio_processor):
+    # 📄 Convert PDF to audio
     reader = PdfReader(pdf_file)
     total_pages = min(len(reader.pages), max_pages)
     texts, audios = [], {}
+    async def process_page(i, text): audios[i] = await audio_processor.create_audio(text, voice)
     for i in range(total_pages):
         text = reader.pages[i].extract_text()
         texts.append(text)
         threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start()
     return texts, audios, total_pages
+# 🔍 ArXiv & AI Lookup
+def parse_arxiv_refs(ref_text):
+    # 📜 Parse ArXiv refs into dicts
+    if not ref_text: return []
+    papers = []
+    current = {}
+    for line in ref_text.split('\n'):
+        if line.count('|') == 2:
+            if current: papers.append(current)
+            date, title, *_ = line.strip('* ').split('|')
+            url = re.search(r'(https://arxiv.org/\S+)', line).group(1) if re.search(r'(https://arxiv.org/\S+)', line) else f"paper_{len(papers)}"
+            current = {'date': date, 'title': title, 'url': url, 'authors': '', 'summary': '', 'full_audio': None, 'download_base64': ''}
+        elif current:
+            if not current['authors']: current['authors'] = line.strip('* ')
+            else: current['summary'] += ' ' + line.strip() if current['summary'] else line.strip()
+    if current: papers.append(current)
+    return papers[:20]
+def generate_5min_feature_markdown(paper):
+    # ✨ 5-min research paper feature
+    title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
+    pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
+    wct, sw = len(title.split()), len(summary.split())
+    terms = get_high_info_terms(summary, 15)
+    rouge = round((len(terms) / max(sw, 1)) * 100, 2)
+    mermaid = "```mermaid\nflowchart TD\n" + "\n".join(f'    T{i+1}["{t}"] --> T{i+2}["{terms[i+1]}"]' for i in range(len(terms)-1)) + "\n```"
+    return f"""
+## 📄 {title}
+**Authors:** {authors} | **Date:** {date} | **Words:** Title: {wct}, Summary: {sw}
+**Links:** [Abstract]({url}) | [PDF]({pdf_url})
+**Terms:** {', '.join(terms)} | **ROUGE:** {rouge}%
+### 🎤 TTF Read Aloud
+- **Title:** {title} | **Terms:** {', '.join(terms)} | **ROUGE:** {rouge}%
+#### Concepts Graph
+{mermaid}
+---
+"""
+def create_detailed_paper_md(papers): return "# Detailed Summary\n" + "\n".join(generate_5min_feature_markdown(p) for p in papers)
+async def create_paper_audio_files(papers, query):
+    # 🎧 Generate paper audio
+    for p in papers:
+        audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
+        p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'])
+        if p['full_audio']: p['download_base64'] = get_download_link(p['full_audio'])
+def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
+    # 🔮 AI-powered research
     client = anthropic.Anthropic(api_key=anthropic_key)
+    response = client.messages.create(model="claude-3-sonnet-20240229", max_tokens=1000, messages=[{"role": "user", "content": q}])
     result = response.content[0].text
+    st.markdown("### Claude's Reply 🧠\n" + result)
     md_file = create_file(q, result)
+    audio_file, _ = await async_edge_tts_generate(result, st.session_state['tts_voice'])
     play_and_download_audio(audio_file)
     if useArxiv:
         q += result
         gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
         refs = gradio_client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
         result = f"🔎 {q}\n\n{refs}"
+        md_file, audio_file = create_file(q, result), (await async_edge_tts_generate(result, st.session_state['tts_voice']))[0]
         play_and_download_audio(audio_file)
         papers = parse_arxiv_refs(refs)
+        if papers and useArxivAudio: await create_paper_audio_files(papers, q)
         return result, papers
     return result, []
+# 📦 Zip Files
+def create_zip_of_files(md_files, mp3_files, query):
+    # 📦 Zip it up
+    all_files = md_files + mp3_files
+    if not all_files: return None
+    terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in all_files] + [query]), 5)
+    zip_name = f"{format_timestamp_prefix()}_{'-'.join(terms)[:20]}.zip"
+    with zipfile.ZipFile(zip_name, 'w') as z: [z.write(f) for f in all_files]
+    return zip_name
+# 🎮 Main Interface
+async def async_interface():
+    init_session_state()
+    if not st.session_state.username:
+        available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in (await load_chat()).split('\n'))]
+        st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
+        st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username]
+    st.title(f"🤖🧠MMO Chat & Research for {st.session_state.username}📝🔬")
+    update_marquee_settings_ui()
+    display_marquee(f"🚀 Welcome to {START_ROOM} | 🤖 {st.session_state.username}", st.session_state['marquee_settings'], "welcome")
+    if not st.session_state.server_task:
+        st.session_state.server_task = asyncio.create_task(run_websocket_server())
+    tab_main = st.radio("Action:", ["🎤 Chat & Voice", "📸 Media", "🔍 ArXiv", "📚 PDF to Audio"], horizontal=True)
+    useArxiv, useArxivAudio = st.checkbox("Search ArXiv", True), st.checkbox("ArXiv Audio", False)
+    # 🎤 Chat & Voice
+    if tab_main == "🎤 Chat & Voice":
+        st.subheader(f"{START_ROOM} Chat 💬")
+        chat_content = await load_chat()
+        for i, line in enumerate(chat_content.split('\n')):
+            if line.strip() and ': ' in line:
+                st.markdown(line)
+                if st.button("📢 Speak", key=f"speak_{i}"):
+                    audio_file, _ = await async_edge_tts_generate(line.split(': ', 1)[1], st.session_state['tts_voice'])
+                    play_and_download_audio(audio_file)
+        message = st.text_input(f"Message as {st.session_state.username}", key="message_input")
+        if st.button("Send 🚀") and message.strip():
+            await save_chat_entry(st.session_state.username, message, True)
+            st.rerun()
+        st.subheader("🎤 Speech-to-Chat")
+        speech_component = components.declare_component("speech_component", path="mycomponent")
+        transcript_data = speech_component(default_value=st.session_state.get('last_transcript', ''))
+        if transcript_data and 'value' in transcript_data:
+            transcript = transcript_data['value'].strip()
+            st.write(f"🎙️ You said: {transcript}")
+            if st.button("Send to Chat"):
+                await save_chat_entry(st.session_state.username, transcript, True)
+                st.session_state.last_transcript = transcript
                 st.rerun()
+    # 📸 Media
+    elif tab_main == "📸 Media":
+        st.header("📸 Media Gallery")
+        tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
+        with tabs[0]:
+            for a in glob.glob(f"{MEDIA_DIR}/*.mp3"):
+                with st.expander(os.path.basename(a)): play_and_download_audio(a)
+        with tabs[1]:
+            imgs = glob.glob(f"{MEDIA_DIR}/*.png") + glob.glob(f"{MEDIA_DIR}/*.jpg")
+            if imgs:
+                cols = st.columns(3)
+                for i, f in enumerate(imgs): cols[i % 3].image(f, use_container_width=True)
+        with tabs[2]:
+            for v in glob.glob(f"{MEDIA_DIR}/*.mp4"):
+                with st.expander(os.path.basename(v)): st.video(v)
+        uploaded_file = st.file_uploader("Upload Media", type=['png', 'jpg', 'mp4', 'mp3'])
+        if uploaded_file:
+            filename = f"{format_timestamp_prefix(st.session_state.username)}-{hashlib.md5(uploaded_file.getbuffer()).hexdigest()[:8]}.{uploaded_file.name.split('.')[-1]}"
+            with open(f"{MEDIA_DIR}/{filename}", 'wb') as f: f.write(uploaded_file.getbuffer())
+            await save_chat_entry(st.session_state.username, f"Uploaded: {filename}")
             st.rerun()
+    # 🔍 ArXiv
+    elif tab_main == "🔍 ArXiv":
+        q = st.text_input("🔍 Query:")
+        if q and st.button("🔍 Run"):
+            result, papers = perform_ai_lookup(q, useArxiv, useArxivAudio)
+            for i, p in enumerate(papers, 1):
+                with st.expander(f"{i}. 📄 {p['title']}"):
+                    st.markdown(f"**{p['date']} | {p['title']}** — [Link]({p['url']})")
+                    st.markdown(generate_5min_feature_markdown(p))
+                    if p.get('full_audio'): play_and_download_audio(p['full_audio'])
+    # 📚 PDF to Audio
+    elif tab_main == "📚 PDF to Audio":
+        audio_processor = AudioProcessor()
+        pdf_file = st.file_uploader("Choose PDF", "pdf")
+        max_pages = st.slider('Pages', 1, 100, 10)
+        if pdf_file:
+            with st.spinner('Processing...'):
+                texts, audios, total = process_pdf(pdf_file, max_pages, st.session_state['tts_voice'], audio_processor)
+                for i, text in enumerate(texts):
+                    with st.expander(f"Page {i+1}"):
+                        st.markdown(text)
+                        while i not in audios: time.sleep(0.1)
+                        if audios[i]:
+                            st.audio(audios[i], format='audio/mp3')
+                            st.markdown(get_download_link(io.BytesIO(audios[i]), "mp3"), unsafe_allow_html=True)
+    # 🗂️ Sidebar
+    st.sidebar.subheader("Voice Settings")
+    new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
+    if new_username != st.session_state.username:
+        await save_chat_entry("System 🌟", f"{st.session_state.username} changed to {new_username}")
+        st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
+        st.rerun()
+    md_files, mp3_files = glob.glob("*.md"), glob.glob("*.mp3")
+    st.sidebar.markdown("### 📂 File History")
+    for f in sorted(md_files + mp3_files, key=os.path.getmtime, reverse=True)[:10]:
+        st.sidebar.write(f"{FILE_EMOJIS.get(f.split('.')[-1], '📄')} {os.path.basename(f)}")
+    if st.sidebar.button("⬇️ Zip All"):
+        zip_name = create_zip_of_files(md_files, mp3_files, "latest_query")
+        if zip_name: st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
+def main():
+    # 🎉 Kick it off
+    asyncio.run(async_interface())
 if __name__ == "__main__":
     main()