DeepResearchEvaluator

Sleeping

App Files Files Community

awacke1 commited on Jan 19

Commit

6ef6604

verified ·

1 Parent(s): a484f78

Update app.py

Browse files

Files changed (1) hide show

app.py +648 -1690

app.py CHANGED Viewed

@@ -1,19 +1,5 @@
 import streamlit as st
-import anthropic
-import openai
-import base64
-import cv2
-import glob
-import json
-import math
-import os
-import pytz
-import random
-import re
-import requests
-import textract
-import time
-import zipfile
 import plotly.graph_objects as go
 import streamlit.components.v1 as components
 from datetime import datetime
@@ -34,9 +20,6 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx
 import asyncio
 import edge_tts
 from streamlit_marquee import streamlit_marquee
-from concurrent.futures import ThreadPoolExecutor
-from functools import partial
-from typing import Dict, List, Optional, Tuple, Union
 # ─────────────────────────────────────────────────────────
 # 1. CORE CONFIGURATION & SETUP
@@ -52,7 +35,6 @@ st.set_page_config(
         'About': "🚲TalkingAIResearcher🏆"
     }
 )
 load_dotenv()
 # Available English voices for Edge TTS
@@ -68,51 +50,70 @@ EDGE_TTS_VOICES = [
     "en-CA-LiamNeural"
 ]
-# Session state initialization with default values
-DEFAULT_SESSION_STATE = {
-    'marquee_settings': {
         "background": "#1E1E1E",
         "color": "#FFFFFF",
         "font-size": "14px",
         "animationDuration": "20s",
         "width": "100%",
         "lineHeight": "35px"
-    },
-    'tts_voice': EDGE_TTS_VOICES[0],
-    'audio_format': 'mp3',
-    'transcript_history': [],
-    'chat_history': [],
-    'openai_model': "gpt-4o-2024-05-13",
-    'messages': [],
-    'last_voice_input': "",
-    'editing_file': None,
-    'edit_new_name': "",
-    'edit_new_content': "",
-    'viewing_prefix': None,
-    'should_rerun': False,
-    'old_val': None,
-    'last_query': "",
-    'marquee_content': "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant",
-    'enable_audio': False,
-    'enable_download': False,
-    'enable_claude': True,
-    'audio_cache': {},
-    'paper_cache': {},
-    'download_link_cache': {},
-    'performance_metrics': defaultdict(list),
-    'operation_timings': defaultdict(float)
-}
-# Initialize session state
-for key, value in DEFAULT_SESSION_STATE.items():
-    if key not in st.session_state:
-        st.session_state[key] = value
-# API Keys and Configuration
 openai_api_key = os.getenv('OPENAI_API_KEY', "")
 anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
-xai_key = os.getenv('xai', "")
 if 'OPENAI_API_KEY' in st.secrets:
     openai_api_key = st.secrets['OPENAI_API_KEY']
 if 'ANTHROPIC_API_KEY' in st.secrets:
@@ -123,1012 +124,35 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR
 HF_KEY = os.getenv('HF_KEY')
 API_URL = os.getenv('API_URL')
-# File type emojis for display
 FILE_EMOJIS = {
     "md": "📝",
     "mp3": "🎵",
     "wav": "🔊",
-    "pdf": "📄",
-    "txt": "📋",
-    "json": "📊",
-    "csv": "📈"
 }
 # ──────────────────────────────────────────���──────────────
-# 2. PERFORMANCE MONITORING & TIMING
 # ─────────────────────────────────────────────────────────
-class PerformanceTimer:
-    """Context manager for timing operations with automatic logging."""
-    def __init__(self, operation_name: str):
-        self.operation_name = operation_name
-        self.start_time = None
-    def __enter__(self):
-        self.start_time = time.time()
-        return self
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if not exc_type:  # Only log if no exception occurred
-            duration = time.time() - self.start_time
-            st.session_state['operation_timings'][self.operation_name] = duration
-            st.session_state['performance_metrics'][self.operation_name].append(duration)
-def log_performance_metrics():
-    """Display performance metrics in the sidebar."""
-    st.sidebar.markdown("### ⏱️ Performance Metrics")
-    metrics = st.session_state['operation_timings']
-    if metrics:
-        total_time = sum(metrics.values())
-        st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
-        # Create timing breakdown
-        for operation, duration in metrics.items():
-            percentage = (duration / total_time) * 100
-            st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
-        # Show timing history chart
-        if st.session_state['performance_metrics']:
-            history_data = []
-            for op, times in st.session_state['performance_metrics'].items():
-                if times:  # Only show if we have timing data
-                    avg_time = sum(times) / len(times)
-                    history_data.append({"Operation": op, "Avg Time (s)": avg_time})
-            if history_data:  # Create chart if we have data
-                st.sidebar.markdown("### 📈 Timing History")
-                chart_data = pd.DataFrame(history_data)
-                st.sidebar.bar_chart(chart_data.set_index("Operation"))
-# ─────────────────────────────────────────────────────────
-# 3. OPTIMIZED AUDIO GENERATION
-# ─────────────────────────────────────────────────────────
-def clean_for_speech(text: str) -> str:
-    """Clean up text for TTS output with enhanced cleaning."""
-    with PerformanceTimer("text_cleaning"):
-        # Remove markdown formatting
-        text = re.sub(r'#+ ', '', text)  # Remove headers
-        text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)  # Clean links
-        text = re.sub(r'[*_~`]', '', text)  # Remove emphasis markers
-        # Remove code blocks
-        text = re.sub(r'```[\s\S]*?```', '', text)
-        text = re.sub(r'`[^`]*`', '', text)
-        # Clean up whitespace
-        text = re.sub(r'\s+', ' ', text)
-        text = text.replace("\n", " ")
-        text = text.replace("</s>", " ")
-        # Remove URLs
-        text = re.sub(r'https?://\S+', '', text)
-        text = re.sub(r'\(https?://[^\)]+\)', '', text)
-        # Final cleanup
-        text = text.strip()
-        return text
-async def async_edge_tts_generate(
-    text: str,
-    voice: str,
-    rate: int = 0,
-    pitch: int = 0,
-    file_format: str = "mp3"
-) -> Tuple[Optional[str], float]:
-    """Asynchronous TTS generation with performance tracking and caching."""
-    with PerformanceTimer("tts_generation") as timer:
-        # Clean and validate text
-        text = clean_for_speech(text)
-        if not text.strip():
-            return None, 0
-        # Check cache
-        cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
-        if cache_key in st.session_state['audio_cache']:
-            return st.session_state['audio_cache'][cache_key], 0
-        try:
-            # Generate audio
-            rate_str = f"{rate:+d}%"
-            pitch_str = f"{pitch:+d}Hz"
-            communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
-            # Generate unique filename
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
-            # Save audio file
-            await communicate.save(filename)
-            # Cache result
-            st.session_state['audio_cache'][cache_key] = filename
-            return filename, time.time() - timer.start_time
-        except Exception as e:
-            st.error(f"Error generating audio: {str(e)}")
-            return None, 0
-async def async_save_qa_with_audio(
-    question: str,
-    answer: str,
-    voice: Optional[str] = None
-) -> Tuple[str, Optional[str], float, float]:
-    """Asynchronously save Q&A to markdown and generate audio with timing."""
-    voice = voice or st.session_state['tts_voice']
-    with PerformanceTimer("qa_save") as timer:
-        # Save markdown
-        md_start = time.time()
-        combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
-        md_file = create_file(question, answer, "md")
-        md_time = time.time() - md_start
-        # Generate audio if enabled
-        audio_file = None
-        audio_time = 0
-        if st.session_state['enable_audio']:
-            audio_text = f"{question}\n\nAnswer: {answer}"
-            audio_file, audio_time = await async_edge_tts_generate(
-                audio_text,
-                voice=voice,
-                file_format=st.session_state['audio_format']
-            )
-        return md_file, audio_file, md_time, audio_time
-def create_download_link_with_cache(
-    file_path: str,
-    file_type: str = "mp3"
-) -> str:
-    """Create download link with caching and error handling."""
-    with PerformanceTimer("download_link_generation"):
-        # Check cache first
-        cache_key = f"dl_{file_path}"
-        if cache_key in st.session_state['download_link_cache']:
-            return st.session_state['download_link_cache'][cache_key]
-        try:
-            with open(file_path, "rb") as f:
-                b64 = base64.b64encode(f.read()).decode()
-            # Generate appropriate link based on file type
-            filename = os.path.basename(file_path)
-            if file_type == "mp3":
-                link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
-            elif file_type == "wav":
-                link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
-            elif file_type == "md":
-                link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
-            else:
-                link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
-            # Cache and return
-            st.session_state['download_link_cache'][cache_key] = link
-            return link
-        except Exception as e:
-            st.error(f"Error creating download link: {str(e)}")
-            return ""
-# ---
-def display_voice_tab():
-    """Display voice input tab with TTS settings."""
-    st.subheader("🎤 Voice Input")
-    # Voice Settings Section
-    st.markdown("### 🎤 Voice Settings")
-    captionFemale='Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**'
-    captionMale='Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**'
-    st.sidebar.image('Group Picture - Voices.png', caption=captionFemale + ' - ' + captionMale)
-    st.sidebar.markdown("""
-    # 🎙️ Voice Character Agent Selector 🎭
-        1. Female:
-        - 🌸 **Aria** – Female: 🌟 The voice of elegance and creativity, perfect for soothing storytelling or inspiring ideas.
-        - 🎶 **Jenny** – Female: 💖 Sweet and friendly, she’s the go-to for warm, conversational tones.
-        - 🌺 **Sonia** – Female: 💃 Bold and confident, ideal for commanding attention and delivering with flair.
-        - 🌌 **Natasha** – Female: ✨ Enigmatic and sophisticated, Natasha is great for a touch of mystery and charm.
-        - 🌷 **Clara** – Female: 🎀 Cheerful and gentle, perfect for nurturing, empathetic conversations.
-        ---
-        2. Male:
-        - 🌟 **Guy** – Male: 🎩 Sophisticated and versatile, a natural fit for clear and authoritative delivery.
-        - 🛠️ **Ryan** – Male: 🤝 Down-to-earth and approachable, ideal for friendly and casual exchanges.
-        - 🎻 **William** – Male: 📚 Classic and refined, perfect for a scholarly or thoughtful tone.
-        - 🌟 **Liam** – Male: ⚡ Energetic and upbeat, great for dynamic, engaging interactions.
-    """)
-    selected_voice = st.selectbox(
-        "Select TTS Voice:",
-        options=EDGE_TTS_VOICES,
-        index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
-    )
-    # Audio Format Selection
-    st.markdown("### 🔊 Audio Format")
-    selected_format = st.radio(
-        "Choose Audio Format:",
-        options=["MP3", "WAV"],
-        index=0
-    )
-    # Update session state if settings change
-    if selected_voice != st.session_state['tts_voice']:
-        st.session_state['tts_voice'] = selected_voice
-        st.rerun()
-    if selected_format.lower() != st.session_state['audio_format']:
-        st.session_state['audio_format'] = selected_format.lower()
-        st.rerun()
-    # Text Input Area
-    user_text = st.text_area("💬 Message:", height=100)
-    user_text = user_text.strip().replace('\n', ' ')
-    # Send Button
-    if st.button("📨 Send"):
-        process_voice_input(user_text)
-    # Chat History
-    st.subheader("📜 Chat History")
-    for c in st.session_state.chat_history:
-        st.write("**You:**", c["user"])
-        st.write("**Response:**", c["claude"])
-def display_arxiv_tab():
-    """Display ArXiv search tab with options."""
-    st.subheader("🔍 Query ArXiv")
-    q = st.text_input("🔍 Query:", key="arxiv_query")
-    # Options Section
-    st.markdown("### 🎛 Options")
-    col1, col2 = st.columns(2)
-    with col1:
-        vocal_summary = st.checkbox("🎙 Short Audio", value=True,
-                                  key="option_vocal_summary")
-        extended_refs = st.checkbox("📜 Long Refs", value=False,
-                                  key="option_extended_refs")
-    with col2:
-        titles_summary = st.checkbox("🔖 Titles Only", value=True,
-                                   key="option_titles_summary")
-        full_audio = st.checkbox("📚 Full Audio", value=False,
-                               key="option_full_audio")
-    full_transcript = st.checkbox("🧾 Full Transcript", value=False,
-                                key="option_full_transcript")
-    if q and st.button("🔍 Run Search"):
-        st.session_state.last_query = q
-        result, timings = perform_ai_lookup(
-            q,
-            vocal_summary=vocal_summary,
-            extended_refs=extended_refs,
-            titles_summary=titles_summary,
-            full_audio=full_audio
-        )
-        if full_transcript:
-            create_file(q, result, "md")
-def display_media_tab():
-    """Display media gallery tab with audio, images, and video."""
-    st.header("📸 Media Gallery")
-    # Create tabs for different media types
-    tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
-    # Audio Files Tab
-    with tabs[0]:
-        st.subheader("🎵 Audio Files")
-        audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
-        if audio_files:
-            for audio_file in audio_files:
-                with st.expander(os.path.basename(audio_file)):
-                    st.audio(audio_file)
-                    ext = os.path.splitext(audio_file)[1].replace('.', '')
-                    dl_link = get_download_link(audio_file, file_type=ext)
-                    st.markdown(dl_link, unsafe_allow_html=True)
-        else:
-            st.write("No audio files found.")
-    # Images Tab
-    with tabs[1]:
-        st.subheader("🖼 Image Files")
-        image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
-        if image_files:
-            cols = st.slider("Columns:", 1, 5, 3, key="cols_images")
-            image_cols = st.columns(cols)
-            for i, img_file in enumerate(image_files):
-                with image_cols[i % cols]:
-                    try:
-                        img = Image.open(img_file)
-                        st.image(img, use_column_width=True)
-                    except Exception as e:
-                        st.error(f"Error loading image {img_file}: {str(e)}")
-        else:
-            st.write("No images found.")
-    # Video Tab
-    with tabs[2]:
-        st.subheader("🎥 Video Files")
-        video_files = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
-        if video_files:
-            for video_file in video_files:
-                with st.expander(os.path.basename(video_file)):
-                    st.video(video_file)
-        else:
-            st.write("No videos found.")
-def display_editor_tab():
-    """Display text editor tab with file management."""
-    st.subheader("📝 Text Editor")
-    # File Management Section
-    st.markdown("### 📂 File Management")
-    # File Selection
-    md_files = glob.glob("*.md")
-    selected_file = st.selectbox(
-        "Select file to edit:",
-        ["New File"] + md_files,
-        key="file_selector"
-    )
-    # Edit Area
-    if selected_file == "New File":
-        new_filename = st.text_input("New filename (without extension):")
-        file_content = st.text_area("Content:", height=300)
-        if st.button("💾 Save File"):
-            if new_filename:
-                try:
-                    with open(f"{new_filename}.md", 'w', encoding='utf-8') as f:
-                        f.write(file_content)
-                    st.success(f"File {new_filename}.md saved successfully!")
-                    st.session_state.should_rerun = True
-                except Exception as e:
-                    st.error(f"Error saving file: {str(e)}")
-            else:
-                st.warning("Please enter a filename.")
-    else:
-        try:
-            # Load existing file content
-            with open(selected_file, 'r', encoding='utf-8') as f:
-                file_content = f.read()
-            # Edit existing file
-            edited_content = st.text_area(
-                "Edit content:",
-                value=file_content,
-                height=300
-            )
-            col1, col2 = st.columns(2)
-            with col1:
-                if st.button("💾 Save Changes"):
-                    try:
-                        with open(selected_file, 'w', encoding='utf-8') as f:
-                            f.write(edited_content)
-                        st.success("Changes saved successfully!")
-                    except Exception as e:
-                        st.error(f"Error saving changes: {str(e)}")
-            with col2:
-                if st.button("🗑 Delete File"):
-                    try:
-                        os.remove(selected_file)
-                        st.success(f"File {selected_file} deleted successfully!")
-                        st.session_state.should_rerun = True
-                    except Exception as e:
-                        st.error(f"Error deleting file: {str(e)}")
-        except Exception as e:
-            st.error(f"Error loading file {selected_file}: {str(e)}")
-def display_settings_tab():
-    """Display application settings tab."""
-    st.subheader("⚙️ Settings")
-    # General Settings
-    st.markdown("### 🔧 General Settings")
-    # Theme Selection
-    theme = st.selectbox(
-        "Color Theme:",
-        ["Dark", "Light", "Custom"],
-        index=0
-    )
-    if theme == "Custom":
-        st.color_picker("Primary Color:", "#1E1E1E")
-        st.color_picker("Secondary Color:", "#2D2D2D")
-    # Performance Settings
-    st.markdown("### ⚡ Performance Settings")
-    # Cache Settings
-    cache_size = st.slider(
-        "Maximum Cache Size (MB):",
-        0, 1000, 100
-    )
-    if st.button("Clear Cache"):
-        st.session_state['audio_cache'] = {}
-        st.session_state['paper_cache'] = {}
-        st.session_state['download_link_cache'] = {}
-        st.success("Cache cleared successfully!")
-    # API Settings
-    st.markdown("### 🔑 API Settings")
-    # Show/hide API keys
-    show_keys = st.checkbox("Show API Keys")
-    if show_keys:
-        st.text_input("OpenAI API Key:", value=openai_api_key)
-        st.text_input("Anthropic API Key:", value=anthropic_key)
-    # Save Settings
-    if st.button("💾 Save Settings"):
-        st.success("Settings saved successfully!")
-        st.session_state.should_rerun = True
-def get_download_link(file: str, file_type: str = "zip") -> str:
-    """
-    Convert a file to base64 and return an HTML link for download.
-    Supports multiple file types with appropriate MIME types.
-    """
-    try:
-        with open(file, "rb") as f:
-            b64 = base64.b64encode(f.read()).decode()
-        # Get filename for display
-        filename = os.path.basename(file)
-        # Define MIME types and emoji icons for different file types
-        mime_types = {
-            "zip": ("application/zip", "📂"),
-            "mp3": ("audio/mpeg", "🎵"),
-            "wav": ("audio/wav", "🔊"),
-            "md": ("text/markdown", "📝"),
-            "pdf": ("application/pdf", "📄"),
-            "txt": ("text/plain", "📋"),
-            "json": ("application/json", "📊"),
-            "csv": ("text/csv", "📈"),
-            "png": ("image/png", "🖼"),
-            "jpg": ("image/jpeg", "🖼"),
-            "jpeg": ("image/jpeg", "🖼")
-        }
-        # Get MIME type and emoji for file
-        mime_type, emoji = mime_types.get(
-            file_type.lower(),
-            ("application/octet-stream", "⬇️")
-        )
-        # Create download link with appropriate MIME type
-        link = f'<a href="data:{mime_type};base64,{b64}" download="{filename}">{emoji} Download {filename}</a>'
-        return link
-    except FileNotFoundError:
-        return f"<p style='color: red'>❌ File not found: {file}</p>"
-    except Exception as e:
-        return f"<p style='color: red'>❌ Error creating download link: {str(e)}</p>"
-def play_and_download_audio(file_path: str, file_type: str = "mp3"):
-    """
-    Display audio player and download link for audio file.
-    Includes error handling and file validation.
-    """
-    if not file_path:
-        st.warning("No audio file provided.")
-        return
-    if not os.path.exists(file_path):
-        st.error(f"Audio file not found: {file_path}")
-        return
-    try:
-        # Display audio player
-        st.audio(file_path)
-        # Create and display download link
-        dl_link = get_download_link(file_path, file_type=file_type)
-        st.markdown(dl_link, unsafe_allow_html=True)
-    except Exception as e:
-        st.error(f"Error playing audio: {str(e)}")
-def get_file_info(file_path: str) -> dict:
-    """
-    Get detailed information about a file.
-    Returns dictionary with size, modification time, and other metadata.
-    """
-    try:
-        stats = os.stat(file_path)
-        # Get basic file information
-        info = {
-            'name': os.path.basename(file_path),
-            'path': file_path,
-            'size': stats.st_size,
-            'modified': datetime.fromtimestamp(stats.st_mtime),
-            'created': datetime.fromtimestamp(stats.st_ctime),
-            'type': os.path.splitext(file_path)[1].lower().strip('.'),
-        }
-        # Add formatted size
-        if info['size'] < 1024:
-            info['size_fmt'] = f"{info['size']} B"
-        elif info['size'] < 1024 * 1024:
-            info['size_fmt'] = f"{info['size']/1024:.1f} KB"
-        else:
-            info['size_fmt'] = f"{info['size']/(1024*1024):.1f} MB"
-        # Add formatted dates
-        info['modified_fmt'] = info['modified'].strftime("%Y-%m-%d %H:%M:%S")
-        info['created_fmt'] = info['created'].strftime("%Y-%m-%d %H:%M:%S")
-        return info
-    except Exception as e:
-        st.error(f"Error getting file info: {str(e)}")
-        return None
-def sanitize_filename(filename: str) -> str:
-    """
-    Clean and sanitize a filename to ensure it's safe for filesystem.
-    Removes/replaces unsafe characters and enforces length limits.
-    """
-    # Remove or replace unsafe characters
-    filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
-    # Remove leading/trailing spaces and dots
-    filename = filename.strip('. ')
-    # Limit length (reserving space for extension)
-    max_length = 255
-    name, ext = os.path.splitext(filename)
-    if len(filename) > max_length:
-        return name[:(max_length-len(ext))] + ext
-    return filename
-def create_file_with_metadata(filename: str, content: str, metadata: dict = None):
-    """
-    Create a file with optional metadata header.
-    Useful for storing additional information with files.
-    """
-    try:
-        # Sanitize filename
-        safe_filename = sanitize_filename(filename)
-        # Ensure directory exists
-        os.makedirs(os.path.dirname(safe_filename) or '.', exist_ok=True)
-        # Prepare content with metadata
-        if metadata:
-            metadata_str = json.dumps(metadata, indent=2)
-            full_content = f"""---
-{metadata_str}
----
-{content}"""
-        else:
-            full_content = content
-        # Write file
-        with open(safe_filename, 'w', encoding='utf-8') as f:
-            f.write(full_content)
-        return safe_filename
-    except Exception as e:
-        st.error(f"Error creating file: {str(e)}")
-        return None
-def read_file_with_metadata(filename: str) -> tuple:
-    """
-    Read a file and extract any metadata header.
-    Returns tuple of (content, metadata).
-    """
-    try:
-        with open(filename, 'r', encoding='utf-8') as f:
-            content = f.read()
-        # Check for metadata section
-        if content.startswith('---\n'):
-            # Find end of metadata section
-            end_meta = content.find('\n---\n', 4)
-            if end_meta != -1:
-                try:
-                    metadata = json.loads(content[4:end_meta])
-                    content = content[end_meta+5:]
-                    return content, metadata
-                except json.JSONDecodeError:
-                    pass
-        return content, None
-    except Exception as e:
-        st.error(f"Error reading file: {str(e)}")
-        return None, None
-def archive_files(file_paths: list, archive_name: str = None) -> str:
-    """
-    Create a zip archive containing the specified files.
-    Returns path to created archive.
-    """
-    try:
-        # Generate archive name if not provided
-        if not archive_name:
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            archive_name = f"archive_{timestamp}.zip"
-        # Create zip file
-        with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zf:
-            for file_path in file_paths:
-                if os.path.exists(file_path):
-                    zf.write(file_path, os.path.basename(file_path))
-        return archive_name
-    except Exception as e:
-        st.error(f"Error creating archive: {str(e)}")
-        return None
-def list_files_by_type(directory: str = ".",
-                      extensions: list = None,
-                      recursive: bool = False) -> dict:
-    """
-    List files in directory filtered by extension.
-    Returns dict grouping files by type.
-    """
-    try:
-        if extensions is None:
-            extensions = ['md', 'mp3', 'wav', 'pdf', 'txt', 'json', 'csv']
-        files = {}
-        pattern = "**/*" if recursive else "*"
-        for ext in extensions:
-            glob_pattern = f"{pattern}.{ext}"
-            matches = glob.glob(os.path.join(directory, glob_pattern),
-                              recursive=recursive)
-            if matches:
-                files[ext] = matches
-        return files
-    except Exception as e:
-        st.error(f"Error listing files: {str(e)}")
-        return {}
-def get_central_time() -> datetime:
     """Get current time in US Central timezone."""
     central = pytz.timezone('US/Central')
     return datetime.now(central)
-def format_timestamp_prefix() -> str:
     """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
     ct = get_central_time()
     return ct.strftime("%m_%d_%y_%I_%M_%p")
-def get_formatted_time(dt: datetime = None,
-                      timezone: str = 'US/Central',
-                      include_timezone: bool = True,
-                      include_seconds: bool = False) -> str:
-    """
-    Format a datetime object with specified options.
-    If no datetime is provided, uses current time.
-    """
-    if dt is None:
-        tz = pytz.timezone(timezone)
-        dt = datetime.now(tz)
-    elif dt.tzinfo is None:
-        tz = pytz.timezone(timezone)
-        dt = tz.localize(dt)
-    format_string = "%Y-%m-%d %I:%M"
-    if include_seconds:
-        format_string += ":%S"
-    format_string += " %p"
-    if include_timezone:
-        format_string += " %Z"
-    return dt.strftime(format_string)
-def parse_timestamp(timestamp_str: str,
-                   timezone: str = 'US/Central') -> Optional[datetime]:
-    """
-    Parse a timestamp string in various formats.
-    Returns timezone-aware datetime object.
-    """
-    try:
-        # Try different format patterns
-        patterns = [
-            "%m_%d_%y_%I_%M_%p",  # Standard app format
-            "%Y-%m-%d %I:%M %p",   # Common 12-hour format
-            "%Y-%m-%d %H:%M",      # 24-hour format
-            "%m/%d/%y %I:%M %p",   # US date format
-            "%d/%m/%y %I:%M %p"    # European date format
-        ]
-        dt = None
-        for pattern in patterns:
-            try:
-                dt = datetime.strptime(timestamp_str, pattern)
-                break
-            except ValueError:
-                continue
-        if dt is None:
-            raise ValueError(f"Could not parse timestamp: {timestamp_str}")
-        # Add timezone if not present
-        if dt.tzinfo is None:
-            tz = pytz.timezone(timezone)
-            dt = tz.localize(dt)
-        return dt
-    except Exception as e:
-        st.error(f"Error parsing timestamp: {str(e)}")
-        return None
-def get_time_ago(dt: datetime) -> str:
-    """
-    Convert datetime to human-readable "time ago" format.
-    E.g., "2 hours ago", "3 days ago", etc.
-    """
-    try:
-        now = datetime.now(dt.tzinfo)
-        diff = now - dt
-        seconds = diff.total_seconds()
-        if seconds < 60:
-            return "just now"
-        elif seconds < 3600:
-            minutes = int(seconds / 60)
-            return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
-        elif seconds < 86400:
-            hours = int(seconds / 3600)
-            return f"{hours} hour{'s' if hours != 1 else ''} ago"
-        elif seconds < 604800:
-            days = int(seconds / 86400)
-            return f"{days} day{'s' if days != 1 else ''} ago"
-        elif seconds < 2592000:
-            weeks = int(seconds / 604800)
-            return f"{weeks} week{'s' if weeks != 1 else ''} ago"
-        elif seconds < 31536000:
-            months = int(seconds / 2592000)
-            return f"{months} month{'s' if months != 1 else ''} ago"
-        else:
-            years = int(seconds / 31536000)
-            return f"{years} year{'s' if years != 1 else ''} ago"
-    except Exception as e:
-        st.error(f"Error calculating time ago: {str(e)}")
-        return "unknown time ago"
-def format_duration(seconds: float) -> str:
-    """
-    Format a duration in seconds to human-readable string.
-    E.g., "2m 30s", "1h 15m", etc.
-    """
-    try:
-        if seconds < 0:
-            return "invalid duration"
-        # Handle special cases
-        if seconds < 1:
-            return f"{seconds * 1000:.0f}ms"
-        if seconds < 60:
-            return f"{seconds:.1f}s"
-        # Calculate hours, minutes, seconds
-        hours = int(seconds // 3600)
-        minutes = int((seconds % 3600) // 60)
-        secs = seconds % 60
-        # Build duration string
-        parts = []
-        if hours > 0:
-            parts.append(f"{hours}h")
-        if minutes > 0:
-            parts.append(f"{minutes}m")
-        if secs > 0 and hours == 0:  # Only show seconds if less than an hour
-            parts.append(f"{secs:.1f}s")
-        return " ".join(parts)
-    except Exception as e:
-        st.error(f"Error formatting duration: {str(e)}")
-        return "unknown duration"
-async def create_paper_audio_files(papers: List[Dict], input_question: str):
-    """Generate audio files for papers asynchronously with improved naming."""
-    with PerformanceTimer("paper_audio_generation"):
-        tasks = []
-        for paper in papers:
-            try:
-                # Prepare text for audio generation
-                audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
-                audio_text = clean_for_speech(audio_text)
-                # Create sanitized title for filename
-                safe_title = paper['title'].lower()
-                safe_title = re.sub(r'[^\w\s-]', '', safe_title)  # Remove special chars
-                safe_title = re.sub(r'\s+', '_', safe_title)      # Replace spaces with underscores
-                safe_title = safe_title[:100]                     # Limit length
-                # Generate timestamp
-                timestamp = format_timestamp_prefix()
-                # Create filename with timestamp and title
-                filename = f"{timestamp}_{safe_title}.{st.session_state['audio_format']}"
-                # Create task for audio generation
-                async def generate_audio(text, filename):
-                    rate_str = "0%"
-                    pitch_str = "0Hz"
-                    communicate = edge_tts.Communicate(text, st.session_state['tts_voice'])
-                    await communicate.save(filename)
-                    return filename
-                task = generate_audio(audio_text, filename)
-                tasks.append((paper, task, filename))
-            except Exception as e:
-                st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
-                continue
-        # Process all audio generation tasks concurrently
-        for paper, task, filename in tasks:
-            try:
-                audio_file = await task
-                if audio_file:
-                    paper['full_audio'] = audio_file
-                    if st.session_state['enable_download']:
-                        paper['download_base64'] = create_download_link_with_cache(
-                            audio_file,
-                            st.session_state['audio_format']
-                        )
-            except Exception as e:
-                st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
-                paper['full_audio'] = None
-                paper['download_base64'] = ''
-# ─────────────────────────────────────────────────────────
-# 4. PAPER PROCESSING & DISPLAY
-# ─────────────────────────────────────────────────────────
-def parse_arxiv_refs(ref_text: str) -> List[Dict[str, str]]:
-    """Parse arxiv references with improved error handling."""
-    if not ref_text:
-        return []
-    with PerformanceTimer("parse_refs"):
-        results = []
-        current_paper = {}
-        lines = ref_text.split('\n')
-        for i, line in enumerate(lines):
-            try:
-                if line.count('|') == 2:
-                    # Found a new paper line
-                    if current_paper:
-                        results.append(current_paper)
-                        if len(results) >= 20:  # Limit to 20 papers
-                            break
-                    # Parse header parts
-                    header_parts = line.strip('* ').split('|')
-                    date = header_parts[0].strip()
-                    title = header_parts[1].strip()
-                    url_match = re.search(r'(https://arxiv.org/\S+)', line)
-                    url = url_match.group(1) if url_match else f"paper_{len(results)}"
-                    current_paper = {
-                        'date': date,
-                        'title': title,
-                        'url': url,
-                        'authors': '',
-                        'summary': '',
-                        'full_audio': None,
-                        'download_base64': '',
-                    }
-                elif current_paper:
-                    # Add content to current paper
-                    line = line.strip('* ')
-                    if not current_paper['authors']:
-                        current_paper['authors'] = line
-                    else:
-                        if current_paper['summary']:
-                            current_paper['summary'] += ' ' + line
-                        else:
-                            current_paper['summary'] = line
-            except Exception as e:
-                st.warning(f"Error parsing line {i}: {str(e)}")
-                continue
-        # Add final paper if exists
-        if current_paper:
-            results.append(current_paper)
-        return results[:20]  # Ensure we don't exceed 20 papers
-async def create_paper_audio_files(papers: List[Dict], input_question: str):
-    """Generate audio files for papers asynchronously with progress tracking."""
-    with PerformanceTimer("paper_audio_generation"):
-        tasks = []
-        for paper in papers:
-            try:
-                # Prepare text for audio generation
-                audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
-                audio_text = clean_for_speech(audio_text)
-                # Create task for audio generation
-                task = async_edge_tts_generate(
-                    audio_text,
-                    voice=st.session_state['tts_voice'],
-                    file_format=st.session_state['audio_format']
-                )
-                tasks.append((paper, task))
-            except Exception as e:
-                st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
-                continue
-        # Process all audio generation tasks concurrently
-        for paper, task in tasks:
-            try:
-                audio_file, gen_time = await task
-                if audio_file:
-                    paper['full_audio'] = audio_file
-                    if st.session_state['enable_download']:
-                        paper['download_base64'] = create_download_link_with_cache(
-                            audio_file,
-                            st.session_state['audio_format']
-                        )
-            except Exception as e:
-                st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
-                paper['full_audio'] = None
-                paper['download_base64'] = ''
 def initialize_marquee_settings():
-    """Initialize default marquee settings if not present in session state."""
     if 'marquee_settings' not in st.session_state:
         st.session_state['marquee_settings'] = {
             "background": "#1E1E1E",
@@ -1140,50 +164,24 @@ def initialize_marquee_settings():
         }
 def get_marquee_settings():
-    """Get current marquee settings, initializing if needed."""
     initialize_marquee_settings()
     return st.session_state['marquee_settings']
 def update_marquee_settings_ui():
-    """Add color pickers & sliders for marquee configuration in sidebar."""
     st.sidebar.markdown("### 🎯 Marquee Settings")
-    # Create two columns for settings
     cols = st.sidebar.columns(2)
-    # Column 1: Color settings
     with cols[0]:
-        # Background color picker
-        bg_color = st.color_picker(
-            "🎨 Background",
-            st.session_state['marquee_settings']["background"],
-            key="bg_color_picker"
-        )
-        # Text color picker
-        text_color = st.color_picker(
-            "✍️ Text Color",
-            st.session_state['marquee_settings']["color"],
-            key="text_color_picker"
-        )
-    # Column 2: Size and speed settings
     with cols[1]:
-        # Font size slider
-        font_size = st.slider(
-            "📏 Font Size",
-            10, 24, 14,
-            key="font_size_slider"
-        )
-        # Animation duration slider
-        duration = st.slider(
-            "⏱️ Animation Speed",
-            1, 20, 20,
-            key="duration_slider"
-        )
-    # Update session state with new settings
     st.session_state['marquee_settings'].update({
         "background": bg_color,
         "color": text_color,
@@ -1191,254 +189,51 @@ def update_marquee_settings_ui():
         "animationDuration": f"{duration}s"
     })
-def display_marquee(text: str, settings: dict, key_suffix: str = ""):
-    """Show marquee text with specified style settings."""
-    # Truncate long text to prevent performance issues
     truncated_text = text[:280] + "..." if len(text) > 280 else text
-    # Display the marquee
     streamlit_marquee(
         content=truncated_text,
         **settings,
         key=f"marquee_{key_suffix}"
     )
-    # Add spacing after marquee
     st.write("")
-def create_paper_links_md(papers: list) -> str:
-    """Creates a minimal markdown file linking to each paper's arxiv URL."""
-    lines = ["# Paper Links\n"]
-    for i, p in enumerate(papers, start=1):
-        lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})")
-    return "\n".join(lines)
-def apply_custom_styling():
-    """Apply custom CSS styling to the app."""
-    st.markdown("""
-        <style>
-            .main {
-                background: linear-gradient(to right, #1a1a1a, #2d2d2d);
-                color: #fff;
-            }
-            .stMarkdown {
-                font-family: 'Helvetica Neue', sans-serif;
-            }
-            .stButton>button {
-                margin-right: 0.5rem;
-            }
-            .streamlit-marquee {
-                margin: 1rem 0;
-                border-radius: 4px;
-            }
-            .st-emotion-cache-1y4p8pa {
-                padding: 1rem;
-            }
-        </style>
-    """, unsafe_allow_html=True)
-def display_performance_metrics(timings: dict):
-    """Display performance metrics with visualizations."""
-    st.sidebar.markdown("### ⏱️ Performance Metrics")
-    # Calculate total time
-    total_time = sum(timings.values())
-    st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
-    # Show breakdown of operations
-    st.sidebar.markdown("#### Operation Breakdown")
-    for operation, duration in timings.items():
-        percentage = (duration / total_time) * 100 if total_time > 0 else 0
-        st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
-        # Create a progress bar for visual representation
-        st.sidebar.progress(percentage / 100)
-def display_papers(papers: List[Dict], marquee_settings: Dict):
-    """Display paper information with enhanced visualization."""
-    with PerformanceTimer("paper_display"):
-        st.write("## 📚 Research Papers")
-        # Create tabs for different views
-        tab1, tab2 = st.tabs(["📋 List View", "📊 Grid View"])
-        with tab1:
-            for i, paper in enumerate(papers, start=1):
-                # Create marquee for paper title
-                marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]}"
-                display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
-                # Paper details expander
-                with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
-                    # Create PDF link
-                    pdf_url = paper['url'].replace('/abs/', '/pdf/')
-                    # Display paper information
-                    st.markdown(f"""
-                        **Date:** {paper['date']}
-                        **Title:** {paper['title']}
-                        **Links:** 📄 [Abstract]({paper['url']}) | 📑 [PDF]({pdf_url})
-                    """)
-                    st.markdown(f"**Authors:** {paper['authors']}")
-                    st.markdown(f"**Summary:** {paper['summary']}")
-                    # Audio player and download if available
-                    if paper.get('full_audio'):
-                        st.write("🎧 Paper Audio Summary")
-                        st.audio(paper['full_audio'])
-                        if paper['download_base64']:
-                            st.markdown(paper['download_base64'], unsafe_allow_html=True)
-        with tab2:
-            # Create a grid layout of papers
-            cols = st.columns(3)
-            for i, paper in enumerate(papers):
-                with cols[i % 3]:
-                    st.markdown(f"""
-                        ### 📄 {paper['title'][:50]}...
-                        **Date:** {paper['date']}
-                        [Abstract]({paper['url']}) | [PDF]({paper['url'].replace('/abs/', '/pdf/')})
-                    """)
-                    if paper.get('full_audio'):
-                        st.audio(paper['full_audio'])
-def display_papers_in_sidebar(papers: List[Dict]):
-    """Display paper listing in sidebar with lazy loading."""
-    with PerformanceTimer("sidebar_display"):
-        st.sidebar.title("📚 Papers Overview")
-        # Add filter options
-        filter_date = st.sidebar.date_input("Filter by date:", None)
-        search_term = st.sidebar.text_input("Search papers:", "")
-        # Filter papers based on criteria
-        filtered_papers = papers
-        if filter_date:
-            filtered_papers = [p for p in filtered_papers
-                             if filter_date.strftime("%Y-%m-%d") in p['date']]
-        if search_term:
-            search_lower = search_term.lower()
-            filtered_papers = [p for p in filtered_papers
-                             if search_lower in p['title'].lower()
-                             or search_lower in p['authors'].lower()]
-        # Display filtered papers
-        for i, paper in enumerate(filtered_papers, start=1):
-            paper_key = f"paper_{paper['url']}"
-            if paper_key not in st.session_state:
-                st.session_state[paper_key] = False
-            with st.sidebar.expander(f"{i}. {paper['title'][:50]}...", expanded=False):
-                # Paper metadata
-                st.markdown(f"**Date:** {paper['date']}")
-                # Links
-                pdf_url = paper['url'].replace('/abs/', '/pdf/')
-                st.markdown(f"📄 [Abstract]({paper['url']}) | 📑 [PDF]({pdf_url})")
-                # Preview of authors and summary
-                st.markdown(f"**Authors:** {paper['authors'][:100]}...")
-                if paper['summary']:
-                    st.markdown(f"**Summary:** {paper['summary'][:200]}...")
-                # Audio controls
-                if paper['full_audio']:
-                    if st.button("🎵 Load Audio", key=f"btn_{paper_key}"):
-                        st.session_state[paper_key] = True
-                    if st.session_state[paper_key]:
-                        st.audio(paper['full_audio'])
-                        if paper['download_base64']:
-                            st.markdown(paper['download_base64'], unsafe_allow_html=True)
-# ─────────────────────────────────────────────────────────
-# 5. FILE MANAGEMENT & HISTORY
-# ─────────────────────────────────────────────────────────
-def create_file(prompt: str, response: str, file_type: str = "md") -> str:
-    """Create a file with proper naming and error handling."""
-    with PerformanceTimer("file_creation"):
-        try:
-            # Generate filename
-            filename = generate_filename(prompt.strip(), response.strip(), file_type)
-            # Ensure directory exists
-            os.makedirs("generated_files", exist_ok=True)
-            filepath = os.path.join("generated_files", filename)
-            # Write content
-            with open(filepath, 'w', encoding='utf-8') as f:
-                if file_type == "md":
-                    f.write(f"# Query\n{prompt}\n\n# Response\n{response}")
-                else:
-                    f.write(f"{prompt}\n\n{response}")
-            return filepath
-        except Exception as e:
-            st.error(f"Error creating file: {str(e)}")
-            return ""
-def get_high_info_terms(text: str, top_n: int = 10) -> List[str]:
-    """Extract most informative terms from text."""
-    # Common English stop words to filter out
-    stop_words = set([
-        'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
-        'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over',
-        'after', 'the', 'this', 'that', 'these', 'those', 'what', 'which'
-    ])
-    # Extract words and bi-grams
     words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
     bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
-    # Combine and filter terms
     combined = words + bi_grams
-    filtered = [term for term in combined
-               if term not in stop_words
-               and len(term.split()) <= 2
-               and len(term) > 3]
-    # Count and return top terms
     counter = Counter(filtered)
     return [term for term, freq in counter.most_common(top_n)]
 def clean_text_for_filename(text: str) -> str:
-    """Clean text for use in filenames."""
-    # Remove special characters
     text = text.lower()
     text = re.sub(r'[^\w\s-]', '', text)
-    # Remove common unhelpful words
-    stop_words = set([
-        'the', 'and', 'for', 'with', 'this', 'that', 'what', 'which',
-        'where', 'when', 'why', 'how', 'who', 'whom', 'whose', 'ai',
-        'library', 'function', 'method', 'class', 'object', 'variable'
-    ])
     words = text.split()
-    filtered = [w for w in words if len(w) > 3 and w not in stop_words]
     return '_'.join(filtered)[:200]
-def generate_filename(prompt: str, response: str, file_type: str = "md",
-                     max_length: int = 200) -> str:
-    """Generate descriptive filename from content."""
-    # Get timestamp prefix
     prefix = format_timestamp_prefix() + "_"
-    # Extract informative terms
-    combined_text = (prompt + " " + response)[:500]
-    info_terms = get_high_info_terms(combined_text, top_n=5)
-    # Get content snippet
     snippet = (prompt[:40] + " " + response[:40]).strip()
     snippet_cleaned = clean_text_for_filename(snippet)
-    # Combine and deduplicate parts
     name_parts = info_terms + [snippet_cleaned]
     seen = set()
     unique_parts = []
@@ -1447,7 +242,6 @@ def generate_filename(prompt: str, response: str, file_type: str = "md",
             seen.add(part)
             unique_parts.append(part)
-    # Create final filename
     full_name = '_'.join(unique_parts).strip('_')
     leftover_chars = max_length - len(prefix) - len(file_type) - 1
     if len(full_name) > leftover_chars:
@@ -1455,415 +249,579 @@ def generate_filename(prompt: str, response: str, file_type: str = "md",
     return f"{prefix}{full_name}.{file_type}"
-def create_zip_of_files(md_files: List[str], mp3_files: List[str],
-                       wav_files: List[str], input_question: str) -> Optional[str]:
-    """Create zip archive of files with optimization."""
-    with PerformanceTimer("zip_creation"):
-        # Filter out readme and empty files
-        md_files = [f for f in md_files
-                   if os.path.basename(f).lower() != 'readme.md'
-                   and os.path.getsize(f) > 0]
-        all_files = md_files + mp3_files + wav_files
-        if not all_files:
-            return None
-        try:
-            # Generate zip name
-            all_content = []
-            for f in all_files:
-                if f.endswith('.md'):
-                    with open(f, 'r', encoding='utf-8') as file:
-                        all_content.append(file.read())
-                elif f.endswith(('.mp3', '.wav')):
-                    basename = os.path.splitext(os.path.basename(f))[0]
-                    all_content.append(basename.replace('_', ' '))
-            all_content.append(input_question)
-            combined_content = " ".join(all_content)
-            info_terms = get_high_info_terms(combined_content, top_n=10)
-            timestamp = format_timestamp_prefix()
-            name_text = '-'.join(term for term in info_terms[:5])
-            zip_name = f"archive_{timestamp}_{name_text[:50]}.zip"
-            # Create zip file
-            with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as z:
-                for f in all_files:
-                    z.write(f, os.path.basename(f))
-            return zip_name
-        except Exception as e:
-            st.error(f"Error creating zip archive: {str(e)}")
-            return None
 # ─────────────────────────────────────────────────────────
-# 6. OPTIMIZED AI LOOKUP & PROCESSING
 # ─────────────────────────────────────────────────────────
-def perform_ai_lookup(q: str, vocal_summary: bool = True,
-                     extended_refs: bool = False,
-                     titles_summary: bool = True,
-                     full_audio: bool = False) -> Tuple[str, Dict[str, float]]:
-    """Main AI lookup routine with performance optimization."""
-    with PerformanceTimer("total_lookup") as total_timer:
-        timings = {}
-        # Add operation controls if not present
-        if 'operation_controls' not in st.session_state:
-            st.sidebar.markdown("### 🔧 Operation Controls")
-            st.session_state['enable_claude'] = st.sidebar.checkbox(
-                "Enable Claude Search",
-                value=st.session_state['enable_claude']
-            )
-            st.session_state['enable_audio'] = st.sidebar.checkbox(
-                "Generate Audio",
-                value=st.session_state['enable_audio']
-            )
-            st.session_state['enable_download'] = st.sidebar.checkbox(
-                "Create Download Links",
-                value=st.session_state['enable_download']
-            )
-            st.session_state['operation_controls'] = True
-        result = ""
-        # 1. Claude API (if enabled)
-        if st.session_state['enable_claude']:
-            with PerformanceTimer("claude_api") as claude_timer:
-                try:
-                    client = anthropic.Anthropic(api_key=anthropic_key)
-                    response = client.messages.create(
-                        model="claude-3-sonnet-20240229",
-                        max_tokens=1000,
-                        messages=[{"role": "user", "content": q}]
-                    )
-                    st.write("Claude's reply 🧠:")
-                    st.markdown(response.content[0].text)
-                    result = response.content[0].text
-                    timings['claude_api'] = time.time() - claude_timer.start_time
-                except Exception as e:
-                    st.error(f"Error with Claude API: {str(e)}")
-                    result = "Error occurred during Claude API call"
-                    timings['claude_api'] = 0
-        # 2. Async save and audio generation
-        async def process_results():
-            with PerformanceTimer("results_processing") as proc_timer:
-                md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(
-                    q, result
-                )
-                timings['markdown_save'] = md_time
-                timings['audio_generation'] = audio_time
-                if audio_file and st.session_state['enable_audio']:
-                    st.subheader("📝 Main Response Audio")
-                    st.audio(audio_file)
-                    if st.session_state['enable_download']:
-                        st.markdown(
-                            create_download_link_with_cache(
-                                audio_file,
-                                st.session_state['audio_format']
-                            ),
-                            unsafe_allow_html=True
-                        )
-        # Run async operations
-        asyncio.run(process_results())
-        # 3. Arxiv RAG with performance tracking
-        if st.session_state['enable_claude']:
-            with PerformanceTimer("arxiv_rag") as rag_timer:
-                try:
-                    st.write('Running Arxiv RAG with Claude inputs.')
-                    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
-                    refs = client.predict(
-                        q,
-                        10,
-                        "Semantic Search",
-                        "mistralai/Mixtral-8x7B-Instruct-v0.1",
-                        api_name="/update_with_rag_md"
-                    )[0]
-                    timings['arxiv_rag'] = time.time() - rag_timer.start_time
-                    # Process papers asynchronously
-                    papers = parse_arxiv_refs(refs)
-                    if papers:
-                        with PerformanceTimer("paper_processing") as paper_timer:
-                            async def process_papers():
-                                # Create minimal links page
-                                paper_links = create_paper_links_md(papers)
-                                links_file = create_file(q, paper_links, "md")
-                                st.markdown(paper_links)
-                                # Generate audio and display papers
-                                await create_paper_audio_files(papers, q)
-                                display_papers(papers, get_marquee_settings())
-                                display_papers_in_sidebar(papers)
-                            asyncio.run(process_papers())
-                            timings['paper_processing'] = time.time() - paper_timer.start_time
-                    else:
-                        st.warning("No papers found in the response.")
-                except Exception as e:
-                    st.error(f"Error during Arxiv RAG: {str(e)}")
-                    timings['arxiv_rag'] = 0
-        return result, timings
-def process_voice_input(text: str):
-    """Process voice input with enhanced error handling and feedback."""
-    if not text:
-        st.warning("Please provide some input text.")
-        return
-    with PerformanceTimer("voice_processing"):
         try:
-            st.subheader("🔍 Search Results")
-            result, timings = perform_ai_lookup(
-                text,
-                vocal_summary=True,
-                extended_refs=False,
-                titles_summary=True,
-                full_audio=True
             )
-            # Save results
-            md_file, audio_file = save_qa_with_audio(text, result)
-            # Display results
-            st.subheader("📝 Generated Files")
-            col1, col2 = st.columns(2)
-            with col1:
-                st.write(f"📄 Markdown: {os.path.basename(md_file)}")
-                st.markdown(get_download_link(md_file, "md"), unsafe_allow_html=True)
-            with col2:
-                if audio_file:
-                    st.write(f"🎵 Audio: {os.path.basename(audio_file)}")
-                    play_and_download_audio(
-                        audio_file,
-                        st.session_state['audio_format']
-                    )
         except Exception as e:
-            st.error(f"Error processing voice input: {str(e)}")
 # ─────────────────────────────────────────────────────────
-# 7. SIDEBAR AND FILE HISTORY
 # ─────────────────────────────────────────────────────────
-def display_file_history_in_sidebar():
-    """Display file history with enhanced organization and filtering."""
-    with PerformanceTimer("file_history"):
-        st.sidebar.markdown("---")
-        st.sidebar.markdown("### 📂 File History")
-        # Gather all files
-        md_files = glob.glob("*.md")
-        mp3_files = glob.glob("*.mp3")
-        wav_files = glob.glob("*.wav")
-        all_files = md_files + mp3_files + wav_files
-        if not all_files:
-            st.sidebar.write("No files found.")
-            return
-        # Add file management controls
-        col1, col2 = st.sidebar.columns(2)
-        with col1:
-            if st.button("🗑 Delete All"):
-                try:
-                    for f in all_files:
-                        os.remove(f)
-                    st.session_state.should_rerun = True
-                    st.success("All files deleted successfully.")
-                except Exception as e:
-                    st.error(f"Error deleting files: {str(e)}")
-        with col2:
-            if st.button("⬇️ Zip All"):
-                zip_name = create_zip_of_files(
-                    md_files,
-                    mp3_files,
-                    wav_files,
-                    st.session_state.get('last_query', '')
-                )
-                if zip_name:
-                    st.sidebar.markdown(
-                        get_download_link(zip_name, "zip"),
-                        unsafe_allow_html=True
-                    )
-        # Add file filtering options
-        st.sidebar.markdown("### 🔍 Filter Files")
-        file_search = st.sidebar.text_input("Search files:", "")
-        file_type_filter = st.sidebar.multiselect(
-            "File types:",
-            ["Markdown", "Audio"],
-            default=["Markdown", "Audio"]
-        )
-        # Sort files by modification time
-        all_files.sort(key=os.path.getmtime, reverse=True)
-        # Filter files based on search and type
-        filtered_files = []
         for f in all_files:
-            if file_search.lower() in f.lower():
-                ext = os.path.splitext(f)[1].lower()
-                if (("Markdown" in file_type_filter and ext == ".md") or
-                    ("Audio" in file_type_filter and ext in [".mp3", ".wav"])):
-                    filtered_files.append(f)
-        # Display filtered files
-        for f in filtered_files:
-            fname = os.path.basename(f)
-            ext = os.path.splitext(fname)[1].lower().strip('.')
-            emoji = FILE_EMOJIS.get(ext, '📦')
-            # Get file metadata
-            mod_time = datetime.fromtimestamp(os.path.getmtime(f))
-            time_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
-            file_size = os.path.getsize(f) / 1024  # Size in KB
-            with st.sidebar.expander(f"{emoji} {fname}"):
-                st.write(f"**Modified:** {time_str}")
-                st.write(f"**Size:** {file_size:.1f} KB")
-                if ext == "md":
-                    try:
-                        with open(f, "r", encoding="utf-8") as file_in:
-                            snippet = file_in.read(200).replace("\n", " ")
-                        if len(snippet) == 200:
-                            snippet += "..."
-                        st.write(snippet)
-                        st.markdown(
-                            get_download_link(f, file_type="md"),
-                            unsafe_allow_html=True
-                        )
-                    except Exception as e:
-                        st.error(f"Error reading markdown file: {str(e)}")
-                elif ext in ["mp3", "wav"]:
-                    st.audio(f)
-                    st.markdown(
-                        get_download_link(f, file_type=ext),
-                        unsafe_allow_html=True
-                    )
-                else:
-                    st.markdown(get_download_link(f), unsafe_allow_html=True)
 # ─────────────────────────────────────────────────────────
-# 8. MAIN APPLICATION
 # ────────────────────────────────────��────────────────────
 def main():
-    """Main application entry point with enhanced UI and error handling."""
-    try:
-        # 1. Setup marquee UI in sidebar
-        update_marquee_settings_ui()
-        marquee_settings = get_marquee_settings()
-        # 2. Display welcome marquee
-        display_marquee(
-            st.session_state['marquee_content'],
-            {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
-            key_suffix="welcome"
-        )
-        # 3. Main action tabs
-        tab_main = st.radio(
-            "Action:",
-            ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
-            horizontal=True
-        )
-        # Custom component usage
-        mycomponent = components.declare_component(
-            "mycomponent",
-            path="mycomponent"
-        )
-        val = mycomponent(my_input_value="Hello")
-        if val:
-            # Process input value
-            val_stripped = val.replace('\\n', ' ')
-            edited_input = st.text_area(
-                "✏️ Edit Input:",
-                value=val_stripped,
-                height=100
-            )
-            # Model selection and options
-            run_option = st.selectbox("Model:", ["Arxiv"])
-            col1, col2 = st.columns(2)
-            with col1:
-                #autorun = st.checkbox("⚙ AutoRun", value=True)
-                autorun = st.checkbox("⚙ AutoRun", value=False)
-            with col2:
-                full_audio = st.checkbox("📚 FullAudio", value=False)
-            # Check for input changes
-            input_changed = (val != st.session_state.old_val)
-            if autorun and input_changed:
-                st.session_state.old_val = val
-                st.session_state.last_query = edited_input
-                result, timings = perform_ai_lookup(
-                    edited_input,
-                    vocal_summary=True,
-                    extended_refs=False,
-                    titles_summary=True,
-                    full_audio=full_audio
-                )
-                # Display performance metrics
-                display_performance_metrics(timings)
             else:
-                if st.button("▶ Run"):
-                    st.session_state.old_val = val
-                    st.session_state.last_query = edited_input
-                    result, timings = perform_ai_lookup(
-                        edited_input,
-                        vocal_summary=True,
-                        extended_refs=False,
-                        titles_summary=True,
-                        full_audio=full_audio
-                    )
-                    # Display performance metrics
-                    display_performance_metrics(timings)
-        # Tab-specific content
-        if tab_main == "🔍 ArXiv":
-            display_arxiv_tab()
-        elif tab_main == "🎤 Voice":
-            display_voice_tab()
-        elif tab_main == "📸 Media":
-            display_media_tab()
-        elif tab_main == "📝 Editor":
-            display_editor_tab()
-        # Display file history
-        display_file_history_in_sidebar()
-        # Apply styling
-        apply_custom_styling()
-        # Check for rerun
-        if st.session_state.should_rerun:
-            st.session_state.should_rerun = False
-            st.rerun()
-    except Exception as e:
-        st.error(f"An error occurred in the main application: {str(e)}")
-        st.info("Please try refreshing the page or contact support if the issue persists.")
 if __name__ == "__main__":
-    main()

 import streamlit as st
+import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
 import plotly.graph_objects as go
 import streamlit.components.v1 as components
 from datetime import datetime
 import asyncio
 import edge_tts
 from streamlit_marquee import streamlit_marquee
 # ─────────────────────────────────────────────────────────
 # 1. CORE CONFIGURATION & SETUP
         'About': "🚲TalkingAIResearcher🏆"
     }
 )
 load_dotenv()
 # Available English voices for Edge TTS
     "en-CA-LiamNeural"
 ]
+# Session state variables
+if 'marquee_settings' not in st.session_state:
+    st.session_state['marquee_settings'] = {
         "background": "#1E1E1E",
         "color": "#FFFFFF",
         "font-size": "14px",
         "animationDuration": "20s",
         "width": "100%",
         "lineHeight": "35px"
+    }
+if 'tts_voice' not in st.session_state:
+    st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
+if 'audio_format' not in st.session_state:
+    st.session_state['audio_format'] = 'mp3'
+if 'transcript_history' not in st.session_state:
+    st.session_state['transcript_history'] = []
+if 'chat_history' not in st.session_state:
+    st.session_state['chat_history'] = []
+if 'openai_model' not in st.session_state:
+    st.session_state['openai_model'] = "gpt-4o-2024-05-13"
+if 'messages' not in st.session_state:
+    st.session_state['messages'] = []
+if 'last_voice_input' not in st.session_state:
+    st.session_state['last_voice_input'] = ""
+if 'editing_file' not in st.session_state:
+    st.session_state['editing_file'] = None
+if 'edit_new_name' not in st.session_state:
+    st.session_state['edit_new_name'] = ""
+if 'edit_new_content' not in st.session_state:
+    st.session_state['edit_new_content'] = ""
+if 'viewing_prefix' not in st.session_state:
+    st.session_state['viewing_prefix'] = None
+if 'should_rerun' not in st.session_state:
+    st.session_state['should_rerun'] = False
+if 'old_val' not in st.session_state:
+    st.session_state['old_val'] = None
+if 'last_query' not in st.session_state:
+    st.session_state['last_query'] = ""
+if 'marquee_content' not in st.session_state:
+    st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
+# New: default AutoRun to False (off)
+if 'autorun' not in st.session_state:
+    st.session_state['autorun'] = False
+# API Keys
 openai_api_key = os.getenv('OPENAI_API_KEY', "")
 anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
+xai_key = os.getenv('xai',"")
 if 'OPENAI_API_KEY' in st.secrets:
     openai_api_key = st.secrets['OPENAI_API_KEY']
 if 'ANTHROPIC_API_KEY' in st.secrets:
 HF_KEY = os.getenv('HF_KEY')
 API_URL = os.getenv('API_URL')
+# Helper constants
 FILE_EMOJIS = {
     "md": "📝",
     "mp3": "🎵",
     "wav": "🔊",
+    "pdf": "📕",
+    "mp4": "🎥",
+    "csv": "📈",
+    "xlsx": "📊",
+    "html": "🌐",
+    "py": "🐍",
+    "txt": "📄"
 }
 # ──────────────────────────────────────────���──────────────
+# 2. HELPER FUNCTIONS
 # ─────────────────────────────────────────────────────────
+def get_central_time():
     """Get current time in US Central timezone."""
     central = pytz.timezone('US/Central')
     return datetime.now(central)
+def format_timestamp_prefix():
     """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
     ct = get_central_time()
     return ct.strftime("%m_%d_%y_%I_%M_%p")
 def initialize_marquee_settings():
     if 'marquee_settings' not in st.session_state:
         st.session_state['marquee_settings'] = {
             "background": "#1E1E1E",
         }
 def get_marquee_settings():
     initialize_marquee_settings()
     return st.session_state['marquee_settings']
 def update_marquee_settings_ui():
+    """Add color pickers & sliders for marquee config in sidebar."""
     st.sidebar.markdown("### 🎯 Marquee Settings")
     cols = st.sidebar.columns(2)
     with cols[0]:
+        bg_color = st.color_picker("🎨 Background",
+                                  st.session_state['marquee_settings']["background"],
+                                  key="bg_color_picker")
+        text_color = st.color_picker("✍️ Text",
+                                    st.session_state['marquee_settings']["color"],
+                                    key="text_color_picker")
     with cols[1]:
+        font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
+        duration = st.slider("⏱️ Speed", 1, 20, 20, key="duration_slider")
     st.session_state['marquee_settings'].update({
         "background": bg_color,
         "color": text_color,
         "animationDuration": f"{duration}s"
     })
+def display_marquee(text, settings, key_suffix=""):
+    """Show marquee text with style from settings."""
     truncated_text = text[:280] + "..." if len(text) > 280 else text
     streamlit_marquee(
         content=truncated_text,
         **settings,
         key=f"marquee_{key_suffix}"
     )
     st.write("")
+def get_high_info_terms(text: str, top_n=10) -> list:
+    """Extract top_n freq words or bigrams (excluding stopwords)."""
+    stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
     words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
     bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
     combined = words + bi_grams
+    filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
     counter = Counter(filtered)
     return [term for term, freq in counter.most_common(top_n)]
 def clean_text_for_filename(text: str) -> str:
+    """Remove special chars, short words, etc. for filenames."""
     text = text.lower()
     text = re.sub(r'[^\w\s-]', '', text)
     words = text.split()
+    # remove short or unhelpful words
+    stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
+    filtered = [w for w in words if len(w) > 3 and w not in stop_short]
     return '_'.join(filtered)[:200]
+def generate_filename(prompt, response, file_type="md", max_length=200):
+    """
+    Generate a shortened filename by:
+      1) extracting high-info terms,
+      2) snippet from prompt+response,
+      3) remove duplicates,
+      4) truncate if needed.
+    """
     prefix = format_timestamp_prefix() + "_"
+    combined_text = (prompt + " " + response)[:200]
+    info_terms = get_high_info_terms(combined_text, top_n=5)
     snippet = (prompt[:40] + " " + response[:40]).strip()
     snippet_cleaned = clean_text_for_filename(snippet)
+    # remove duplicates
     name_parts = info_terms + [snippet_cleaned]
     seen = set()
     unique_parts = []
             seen.add(part)
             unique_parts.append(part)
     full_name = '_'.join(unique_parts).strip('_')
     leftover_chars = max_length - len(prefix) - len(file_type) - 1
     if len(full_name) > leftover_chars:
     return f"{prefix}{full_name}.{file_type}"
+def create_file(prompt, response, file_type="md"):
+    """Create a text file from prompt + response with sanitized filename."""
+    filename = generate_filename(prompt.strip(), response.strip(), file_type)
+    with open(filename, 'w', encoding='utf-8') as f:
+        f.write(prompt + "\n\n" + response)
+    return filename
+def get_download_link(file, file_type="zip"):
+    """
+    Convert a file to base64 and return an HTML link for download.
+    """
+    with open(file, "rb") as f:
+        b64 = base64.b64encode(f.read()).decode()
+    if file_type == "zip":
+        return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
+    elif file_type == "mp3":
+        return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎵 Download {os.path.basename(file)}</a>'
+    elif file_type == "wav":
+        return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">🔊 Download {os.path.basename(file)}</a>'
+    elif file_type == "md":
+        return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">📝 Download {os.path.basename(file)}</a>'
+    else:
+        return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
+def clean_for_speech(text: str) -> str:
+    """Clean up text for TTS output."""
+    text = text.replace("\n", " ")
+    text = text.replace("</s>", " ")
+    text = text.replace("#", "")
+    text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
+    """Async TTS generation with edge-tts library."""
+    text = clean_for_speech(text)
+    if not text.strip():
+        return None
+    rate_str = f"{rate:+d}%"
+    pitch_str = f"{pitch:+d}Hz"
+    communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
+    out_fn = generate_filename(text, text, file_type=file_format)
+    await communicate.save(out_fn)
+    return out_fn
+def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
+    """Wrapper for the async TTS generate call."""
+    return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
+def play_and_download_audio(file_path, file_type="mp3"):
+    """Streamlit audio + a quick download link."""
+    if file_path and os.path.exists(file_path):
+        st.audio(file_path)
+        dl_link = get_download_link(file_path, file_type=file_type)
+        st.markdown(dl_link, unsafe_allow_html=True)
+def save_qa_with_audio(question, answer, voice=None):
+    """Save Q&A to markdown and also generate audio."""
+    if not voice:
+        voice = st.session_state['tts_voice']
+    combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
+    md_file = create_file(question, answer, "md")
+    audio_text = f"{question}\n\nAnswer: {answer}"
+    audio_file = speak_with_edge_tts(
+        audio_text,
+        voice=voice,
+        file_format=st.session_state['audio_format']
+    )
+    return md_file, audio_file
 # ─────────────────────────────────────────────────────────
+# 3. PAPER PARSING & DISPLAY
 # ─────────────────────────────────────────────────────────
+def parse_arxiv_refs(ref_text: str):
+    """
+    Given a multi-line markdown with arxiv references, parse them into
+    a list of dicts: {date, title, url, authors, summary, ...}.
+    """
+    if not ref_text:
+        return []
+    results = []
+    current_paper = {}
+    lines = ref_text.split('\n')
+    for i, line in enumerate(lines):
+        if line.count('|') == 2:
+            # Found a new paper line
+            if current_paper:
+                results.append(current_paper)
+                if len(results) >= 20:
+                    break
+            try:
+                header_parts = line.strip('* ').split('|')
+                date = header_parts[0].strip()
+                title = header_parts[1].strip()
+                url_match = re.search(r'(https://arxiv.org/\S+)', line)
+                url = url_match.group(1) if url_match else f"paper_{len(results)}"
+                current_paper = {
+                    'date': date,
+                    'title': title,
+                    'url': url,
+                    'authors': '',
+                    'summary': '',
+                    'full_audio': None,
+                    'download_base64': '',
+                }
+            except Exception as e:
+                st.warning(f"Error parsing paper header: {str(e)}")
+                current_paper = {}
+                continue
+        elif current_paper:
+            # If authors not set, fill it; otherwise, fill summary
+            if not current_paper['authors']:
+                current_paper['authors'] = line.strip('* ')
+            else:
+                if current_paper['summary']:
+                    current_paper['summary'] += ' ' + line.strip()
+                else:
+                    current_paper['summary'] = line.strip()
+    if current_paper:
+        results.append(current_paper)
+    return results[:20]
+def create_paper_links_md(papers):
+    """Creates a minimal .md content linking to each paper's arxiv URL."""
+    lines = ["# Paper Links\n"]
+    for i, p in enumerate(papers, start=1):
+        lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})")
+    return "\n".join(lines)
+def create_paper_audio_files(papers, input_question):
+    """
+    For each paper, generate TTS audio summary, store the path in `paper['full_audio']`,
+    and also store a base64 link for stable downloading.
+    """
+    for paper in papers:
         try:
+            audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
+            audio_text = clean_for_speech(audio_text)
+            file_format = st.session_state['audio_format']
+            audio_file = speak_with_edge_tts(
+                audio_text,
+                voice=st.session_state['tts_voice'],
+                file_format=file_format
             )
+            paper['full_audio'] = audio_file
+            if audio_file:
+                with open(audio_file, "rb") as af:
+                    b64_data = base64.b64encode(af.read()).decode()
+                download_filename = os.path.basename(audio_file)
+                mime_type = "mpeg" if file_format == "mp3" else "wav"
+                paper['download_base64'] = (
+                    f'<a href="data:audio/{mime_type};base64,{b64_data}" '
+                    f'download="{download_filename}">🎵 Download {download_filename}</a>'
+                )
         except Exception as e:
+            st.warning(f"Error processing paper {paper['title']}: {str(e)}")
+            paper['full_audio'] = None
+            paper['download_base64'] = ''
+def display_papers(papers, marquee_settings):
+    """Display paper info in the main area with marquee + expanders + audio."""
+    st.write("## Research Papers")
+    for i, paper in enumerate(papers, start=1):
+        marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}"
+        display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
+        with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
+            st.markdown(f"**{paper['date']} | {paper['title']}** — [Arxiv Link]({paper['url']})")
+            st.markdown(f"*Authors:* {paper['authors']}")
+            st.markdown(paper['summary'])
+            if paper.get('full_audio'):
+                st.write("📚 Paper Audio")
+                st.audio(paper['full_audio'])
+                if paper['download_base64']:
+                    st.markdown(paper['download_base64'], unsafe_allow_html=True)
+def display_papers_in_sidebar(papers):
+    """Mirrors the paper listing in the sidebar with expanders, audio, etc."""
+    st.sidebar.title("🎶 Papers & Audio")
+    for i, paper in enumerate(papers, start=1):
+        with st.sidebar.expander(f"{i}. {paper['title']}"):
+            st.markdown(f"**Arxiv:** [Link]({paper['url']})")
+            if paper['full_audio']:
+                st.audio(paper['full_audio'])
+                if paper['download_base64']:
+                    st.markdown(paper['download_base64'], unsafe_allow_html=True)
+            st.markdown(f"**Authors:** {paper['authors']}")
+            if paper['summary']:
+                st.markdown(f"**Summary:** {paper['summary'][:300]}...")
 # ─────────────────────────────────────────────────────────
+# 4. ZIP & DELETE-ALL UTILS
 # ─────────────────────────────────────────────────────────
+def create_zip_of_all_files():
+    """
+    Zip up all recognized file types, limiting the final zip name to ~20 chars
+    to avoid overly long base64 strings.
+    """
+    # Patterns for .md, .pdf, .mp4, .mp3, .wav, .csv, .xlsx, .html, .py, .txt
+    file_patterns = [
+        "*.md", "*.pdf", "*.mp4", "*.mp3", "*.wav",
+        "*.csv", "*.xlsx", "*.html", "*.py", "*.txt"
+    ]
+    all_files = []
+    for pat in file_patterns:
+        all_files.extend(glob.glob(pat))
+    all_files = list(set(all_files))  # unique
+    if not all_files:
+        return None
+    # Combine content for naming
+    all_content = []
+    for f in all_files:
+        if f.endswith(".md"):
+            with open(f, "r", encoding="utf-8") as fin:
+                all_content.append(fin.read())
+        else:
+            all_content.append(os.path.basename(f))
+    # Add last query if relevant
+    if st.session_state['last_query']:
+        all_content.append(st.session_state['last_query'])
+    combined_content = " ".join(all_content)
+    info_terms = get_high_info_terms(combined_content, top_n=10)
+    timestamp = format_timestamp_prefix()
+    name_text = '-'.join(term for term in info_terms[:5])
+    short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
+    with zipfile.ZipFile(short_zip_name, 'w') as z:
         for f in all_files:
+            z.write(f)
+    return short_zip_name
+def delete_all_files():
+    """Removes all recognized file types from the directory."""
+    file_patterns = [
+        "*.md", "*.pdf", "*.mp4", "*.mp3", "*.wav",
+        "*.csv", "*.xlsx", "*.html", "*.py", "*.txt"
+    ]
+    for pat in file_patterns:
+        for f in glob.glob(pat):
+            os.remove(f)
 # ─────────────────────────────────────────────────────────
+# 5. MAIN LOGIC: AI LOOKUP & VOICE INPUT
+# ─────────────────────────────────────────────────────────
+def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
+                     titles_summary=True, full_audio=False):
+    """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
+    start = time.time()
+    # --- 1) Claude API
+    client = anthropic.Anthropic(api_key=anthropic_key)
+    user_input = q
+    response = client.messages.create(
+        model="claude-3-sonnet-20240229",
+        max_tokens=1000,
+        messages=[
+            {"role": "user", "content": user_input}
+        ])
+    st.write("Claude's reply 🧠:")
+    st.markdown(response.content[0].text)
+    # Save & produce audio
+    result = response.content[0].text
+    create_file(q, result)
+    md_file, audio_file = save_qa_with_audio(q, result)
+    st.subheader("📝 Main Response Audio")
+    play_and_download_audio(audio_file, st.session_state['audio_format'])
+    # --- 2) Arxiv RAG
+    st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
+    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+    refs = client.predict(
+        q,
+        20,
+        "Semantic Search",
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        api_name="/update_with_rag_md"
+    )[0]
+    r2 = client.predict(
+        q,
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        True,
+        api_name="/ask_llm"
+    )
+    result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
+    md_file, audio_file = save_qa_with_audio(q, result)
+    st.subheader("📝 Main Response Audio")
+    play_and_download_audio(audio_file, st.session_state['audio_format'])
+    # --- 3) Parse + handle papers
+    papers = parse_arxiv_refs(refs)
+    if papers:
+        # Create minimal links page first
+        paper_links = create_paper_links_md(papers)
+        links_file = create_file(q, paper_links, "md")
+        st.markdown(paper_links)
+        # Then create audio for each paper
+        create_paper_audio_files(papers, input_question=q)
+        display_papers(papers, get_marquee_settings())
+        display_papers_in_sidebar(papers)
+    else:
+        st.warning("No papers found in the response.")
+    elapsed = time.time() - start
+    st.write(f"**Total Elapsed:** {elapsed:.2f} s")
+    return result
+def process_voice_input(text):
+    """When user sends voice query, we run the AI lookup + Q&A with audio."""
+    if not text:
+        return
+    st.subheader("🔍 Search Results")
+    result = perform_ai_lookup(
+        text,
+        vocal_summary=True,
+        extended_refs=False,
+        titles_summary=True,
+        full_audio=True
+    )
+    md_file, audio_file = save_qa_with_audio(text, result)
+    st.subheader("📝 Generated Files")
+    st.write(f"Markdown: {md_file}")
+    st.write(f"Audio: {audio_file}")
+    play_and_download_audio(audio_file, st.session_state['audio_format'])
+# ─────────────────────────────────────────────────────────
+# 6. FILE HISTORY SIDEBAR
+# ─────────────────────────────────────────────────────────
+def display_file_history_in_sidebar():
+    """
+    Shows a history of each recognized file in descending
+    order of modification time, with quick icons and optional download links.
+    """
+    st.sidebar.markdown("---")
+    st.sidebar.markdown("### 📂 File History")
+    # Patterns for .md, .mp3, .wav, .pdf, .mp4, .csv, .xlsx, .html, .py, .txt
+    patterns = [
+        "*.md", "*.pdf", "*.mp4", "*.mp3", "*.wav",
+        "*.csv", "*.xlsx", "*.html", "*.py", "*.txt"
+    ]
+    all_files = []
+    for p in patterns:
+        all_files.extend(glob.glob(p))
+    all_files = list(set(all_files))  # unique
+    if not all_files:
+        st.sidebar.write("No files found.")
+        return
+    # Sort newest first
+    all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
+    for f in all_files:
+        fname = os.path.basename(f)
+        ext = os.path.splitext(fname)[1].lower().strip('.')
+        emoji = FILE_EMOJIS.get(ext, '📦')
+        time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
+        with st.sidebar.expander(f"{emoji} {fname}"):
+            st.write(f"**Modified:** {time_str}")
+            if ext == "md":
+                with open(f, "r", encoding="utf-8") as file_in:
+                    snippet = file_in.read(200).replace("\n", " ")
+                if len(snippet) == 200:
+                    snippet += "..."
+                st.write(snippet)
+                st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
+            elif ext in ["mp3","wav"]:
+                st.audio(f)
+                st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
+            else:
+                st.markdown(get_download_link(f), unsafe_allow_html=True)
+# ─────────────────────────────────────────────────────────
+# 7. MAIN APP
 # ────────────────────────────────────��────────────────────
 def main():
+    """
+    Main Streamlit app.
+    Now includes:
+      1) Voice & AutoRun at the top of the sidebar,
+      2) File Tools (Delete All / Zip All) in the sidebar,
+      3) A new '📤 Upload' tab,
+      4) Everything else from your original code snippet.
+    """
+    # -- 1) Voice & AutoRun at top of sidebar --
+    st.sidebar.title("Global Settings")
+    selected_voice = st.sidebar.selectbox(
+        "TTS Voice",
+        options=EDGE_TTS_VOICES,
+        index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
+    )
+    # Autorun defaults to off (False)
+    st.session_state.autorun = st.sidebar.checkbox("AutoRun", value=st.session_state.autorun)
+    # Audio format
+    audio_format = st.sidebar.radio("Audio Format", ["MP3","WAV"], index=0)
+    if selected_voice != st.session_state['tts_voice']:
+        st.session_state['tts_voice'] = selected_voice
+        st.experimental_rerun()
+    if audio_format.lower() != st.session_state['audio_format']:
+        st.session_state['audio_format'] = audio_format.lower()
+        st.experimental_rerun()
+    # -- 2) File Tools: Delete All / Zip All
+    st.sidebar.markdown("---")
+    st.sidebar.markdown("### 🗃 File Tools")
+    col_del, col_zip = st.sidebar.columns(2)
+    with col_del:
+        if st.button("🗑 Delete All"):
+            delete_all_files()
+            st.sidebar.success("All recognized files removed!")
+            st.experimental_rerun()
+    with col_zip:
+        if st.button("📦 Zip All"):
+            zip_name = create_zip_of_all_files()
+            if zip_name:
+                st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
+    # -- 3) Marquee Settings
+    update_marquee_settings_ui()
+    marquee_settings = get_marquee_settings()
+    # -- 4) File History in sidebar
+    display_file_history_in_sidebar()
+    # -- 5) Display marquee
+    display_marquee(st.session_state['marquee_content'],
+                    {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
+                    key_suffix="welcome")
+    # -- 6) Main action tabs
+    tab_main = st.radio(
+        "Action:",
+        ["📤 Upload", "🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
+        horizontal=True
+    )
+    # 6a) Upload Tab
+    if tab_main == "📤 Upload":
+        st.header("📤 Upload Files")
+        accepted_types = [
+            # We'll accept basically everything (None in file_uploader),
+            # but let's specify for clarity:
+            "text/plain", "text/markdown", "audio/mpeg", "audio/wav",
+            "image/png", "image/jpeg", "video/mp4", "application/pdf",
+            "application/vnd.ms-excel",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            "text/html", "application/octet-stream",
+        ]
+        uploaded = st.file_uploader("Select files to upload:",
+                                    accept_multiple_files=True,
+                                    type=None)
+        if uploaded:
+            for uf in uploaded:
+                with open(uf.name, "wb") as outfile:
+                    outfile.write(uf.read())
+            st.success("Uploaded!")
+            st.session_state.should_rerun = True
+    # 6b) Voice Tab
+    elif tab_main == "🎤 Voice":
+        st.subheader("🎤 Voice Input")
+        user_text = st.text_area("💬 Message:", height=100)
+        user_text = user_text.strip().replace('\n', ' ')
+        if st.button("📨 Send"):
+            process_voice_input(user_text)
+        st.subheader("📜 Chat History")
+        for c in st.session_state.chat_history:
+            st.write("**You:**", c["user"])
+            st.write("**Response:**", c["claude"])
+    # 6c) Media Tab
+    elif tab_main == "📸 Media":
+        st.header("📸 Media Gallery")
+        tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
+        with tabs[0]:
+            st.subheader("🎵 Audio Files")
+            audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
+            if audio_files:
+                for a in audio_files:
+                    with st.expander(os.path.basename(a)):
+                        st.audio(a)
+                        ext = os.path.splitext(a)[1].replace('.', '')
+                        dl_link = get_download_link(a, file_type=ext)
+                        st.markdown(dl_link, unsafe_allow_html=True)
+            else:
+                st.write("No audio files found.")
+        with tabs[1]:
+            st.subheader("🖼 Image Files")
+            imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
+            if imgs:
+                c = st.slider("Cols", 1, 5, 3, key="cols_images")
+                cols = st.columns(c)
+                for i, f in enumerate(imgs):
+                    with cols[i % c]:
+                        st.image(Image.open(f), use_container_width=True)
+            else:
+                st.write("No images found.")
+        with tabs[2]:
+            st.subheader("🎥 Video Files")
+            vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
+            if vids:
+                for v in vids:
+                    with st.expander(os.path.basename(v)):
+                        st.video(v)
             else:
+                st.write("No videos found.")
+    # 6d) ArXiv Tab
+    elif tab_main == "🔍 ArXiv":
+        st.subheader("🔍 Query ArXiv")
+        q = st.text_input("🔍 Query:", key="arxiv_query")
+        st.markdown("### 🎛 Options")
+        st.write("(AutoRun is in the sidebar.)")
+        extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
+        titles_summary = st.checkbox("🔖TitlesOnly", value=True, key="option_titles_summary")
+        full_audio = st.checkbox("📚FullAudio", value=False, key="option_full_audio")
+        full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript")
+        if q and st.button("🔍Run"):
+            st.session_state.last_query = q
+            result = perform_ai_lookup(q,
+                                       extended_refs=extended_refs,
+                                       titles_summary=titles_summary,
+                                       full_audio=full_audio)
+            if full_transcript:
+                create_file(q, result, "md")
+        # If AutoRun is ON and user typed something
+        if st.session_state.autorun and q:
+            st.session_state.last_query = q
+            result = perform_ai_lookup(q,
+                                       extended_refs=extended_refs,
+                                       titles_summary=titles_summary,
+                                       full_audio=full_audio)
+            if full_transcript:
+                create_file(q, result, "md")
+    # 6e) Editor Tab
+    elif tab_main == "📝 Editor":
+        st.write("Select or create a file to edit. (Currently minimal demo)")
+    # Rerun if needed
+    if st.session_state.should_rerun:
+        st.session_state.should_rerun = False
+        st.experimental_rerun()
 if __name__ == "__main__":
+    main()