awacke1 commited on
Commit
eeb7d45
·
verified ·
1 Parent(s): 3003f19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +842 -0
app.py ADDED
@@ -0,0 +1,842 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import websockets
4
+ import uuid
5
+ from datetime import datetime
6
+ import os
7
+ import random
8
+ import time
9
+ import hashlib
10
+ from PIL import Image
11
+ import glob
12
+ import base64
13
+ import io
14
+ import streamlit.components.v1 as components
15
+ import edge_tts
16
+ from audio_recorder_streamlit import audio_recorder
17
+ import nest_asyncio
18
+ import re
19
+ from streamlit_paste_button import paste_image_button
20
+ import pytz
21
+ import shutil
22
+ import anthropic
23
+ import openai
24
+ from PyPDF2 import PdfReader
25
+ import threading
26
+ import json
27
+ import zipfile
28
+ from gradio_client import Client
29
+ from dotenv import load_dotenv
30
+ from streamlit_marquee import streamlit_marquee
31
+ from collections import defaultdict, Counter
32
+ import pandas as pd
33
+
34
+ # 🛠️ Patch asyncio for nesting
35
+ nest_asyncio.apply()
36
+
37
+ # 🎨 Page Config
38
+ st.set_page_config(
39
+ page_title="🚲TalkingAIResearcher🏆",
40
+ page_icon="🚲🏆",
41
+ layout="wide",
42
+ initial_sidebar_state="auto"
43
+ )
44
+
45
+ # 🌟 Static Config
46
+ icons = '🤖🧠🔬📝'
47
+ Site_Name = '🤖🧠Chat & Quote Node📝🔬'
48
+ START_ROOM = "Sector 🌌"
49
+ FUN_USERNAMES = {
50
+ "CosmicJester 🌌": "en-US-AriaNeural",
51
+ "PixelPanda 🐼": "en-US-JennyNeural",
52
+ "QuantumQuack 🦆": "en-GB-SoniaNeural",
53
+ "StellarSquirrel 🐿️": "en-AU-NatashaNeural",
54
+ "GizmoGuru ⚙️": "en-CA-ClaraNeural",
55
+ "NebulaNinja 🌠": "en-US-GuyNeural",
56
+ "ByteBuster 💾": "en-GB-RyanNeural",
57
+ "GalacticGopher 🌍": "en-AU-WilliamNeural",
58
+ "RocketRaccoon 🚀": "en-CA-LiamNeural",
59
+ "EchoElf 🧝": "en-US-AnaNeural",
60
+ "PhantomFox 🦊": "en-US-BrandonNeural",
61
+ "WittyWizard 🧙": "en-GB-ThomasNeural",
62
+ "LunarLlama 🌙": "en-AU-FreyaNeural",
63
+ "SolarSloth ☀️": "en-CA-LindaNeural",
64
+ "AstroAlpaca 🦙": "en-US-ChristopherNeural",
65
+ "CyberCoyote 🐺": "en-GB-ElliotNeural",
66
+ "MysticMoose 🦌": "en-AU-JamesNeural",
67
+ "GlitchGnome 🧚": "en-CA-EthanNeural",
68
+ "VortexViper 🐍": "en-US-AmberNeural",
69
+ "ChronoChimp 🐒": "en-GB-LibbyNeural"
70
+ }
71
+ EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))
72
+ FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "png": "🖼️", "mp4": "🎥"}
73
+
74
+ # 📁 Directories
75
+ for d in ["chat_logs", "vote_logs", "audio_logs", "history_logs", "audio_cache", "paper_metadata"]:
76
+ os.makedirs(d, exist_ok=True)
77
+
78
+ CHAT_DIR = "chat_logs"
79
+ VOTE_DIR = "vote_logs"
80
+ MEDIA_DIR = "."
81
+ AUDIO_CACHE_DIR = "audio_cache"
82
+ AUDIO_DIR = "audio_logs"
83
+ PAPER_DIR = "paper_metadata"
84
+ STATE_FILE = "user_state.txt"
85
+
86
+ CHAT_FILE = os.path.join(CHAT_DIR, "global_chat.md")
87
+ QUOTE_VOTES_FILE = os.path.join(VOTE_DIR, "quote_votes.md")
88
+ IMAGE_VOTES_FILE = os.path.join(VOTE_DIR, "image_votes.md")
89
+ HISTORY_FILE = os.path.join(VOTE_DIR, "vote_history.md")
90
+
91
+ # 🔑 API Keys
92
+ load_dotenv()
93
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY', st.secrets.get('ANTHROPIC_API_KEY', ""))
94
+ openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ""))
95
+ openai_client = openai.OpenAI(api_key=openai_api_key)
96
+
97
+ # 🕒 Timestamp Helper
98
+ def format_timestamp_prefix(username=""):
99
+ central = pytz.timezone('US/Central')
100
+ now = datetime.now(central)
101
+ return f"{now.strftime('%Y%m%d_%H%M%S')}-by-{username}"
102
+
103
+ # 📈 Performance Timer
104
+ class PerformanceTimer:
105
+ def __init__(self, name):
106
+ self.name, self.start = name, None
107
+ def __enter__(self):
108
+ self.start = time.time()
109
+ return self
110
+ def __exit__(self, *args):
111
+ duration = time.time() - self.start
112
+ st.session_state['operation_timings'][self.name] = duration
113
+ st.session_state['performance_metrics'][self.name].append(duration)
114
+
115
+ # 🎛️ Session State Init
116
+ def init_session_state():
117
+ defaults = {
118
+ 'server_running': False, 'server_task': None, 'active_connections': {},
119
+ 'media_notifications': [], 'last_chat_update': 0, 'displayed_chat_lines': [],
120
+ 'message_text': "", 'audio_cache': {}, 'pasted_image_data': None,
121
+ 'quote_line': None, 'refresh_rate': 5, 'base64_cache': {},
122
+ 'transcript_history': [], 'last_transcript': "", 'image_hashes': set(),
123
+ 'tts_voice': "en-US-AriaNeural", 'chat_history': [], 'marquee_settings': {
124
+ "background": "#1E1E1E", "color": "#FFFFFF", "font-size": "14px",
125
+ "animationDuration": "20s", "width": "100%", "lineHeight": "35px"
126
+ }, 'operation_timings': {}, 'performance_metrics': defaultdict(list),
127
+ 'enable_audio': True, 'download_link_cache': {}, 'username': None,
128
+ 'autosend': True, 'autosearch': True, 'last_message': "", 'last_query': "",
129
+ 'mp3_files': {}, 'timer_start': time.time(), 'quote_index': 0,
130
+ 'quote_source': "famous", 'last_sent_transcript': "", 'old_val': None,
131
+ 'last_refresh': time.time(), 'paper_metadata': {}, 'paste_trigger': False
132
+ }
133
+ for k, v in defaults.items():
134
+ if k not in st.session_state:
135
+ st.session_state[k] = v
136
+
137
+ # 🖌️ Marquee Helpers
138
+ def update_marquee_settings_ui():
139
+ st.sidebar.markdown("### 🎯 Marquee Settings")
140
+ cols = st.sidebar.columns(2)
141
+ with cols[0]:
142
+ st.session_state['marquee_settings']['background'] = st.color_picker("🎨 Background", "#1E1E1E")
143
+ st.session_state['marquee_settings']['color'] = st.color_picker("✍️ Text", "#FFFFFF")
144
+ with cols[1]:
145
+ st.session_state['marquee_settings']['font-size'] = f"{st.slider('📏 Size', 10, 24, 14)}px"
146
+ st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('⏱️ Speed', 1, 20, 20)}s"
147
+
148
+ def display_marquee(text, settings, key_suffix=""):
149
+ truncated = text[:280] + "..." if len(text) > 280 else text
150
+ streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
151
+ st.write("")
152
+
153
+ # 📝 Text & File Helpers
154
+ def clean_text_for_tts(text):
155
+ return re.sub(r'[#*!\[\]]+', '', ' '.join(text.split()))[:200] or "No text"
156
+
157
+ def clean_text_for_filename(text):
158
+ return '_'.join(re.sub(r'[^\w\s-]', '', text.lower()).split())[:200]
159
+
160
+ def get_high_info_terms(text, top_n=10):
161
+ stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'}
162
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
163
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
164
+ filtered = [t for t in words + bi_grams if t not in stop_words and len(t.split()) <= 2]
165
+ return [t for t, _ in Counter(filtered).most_common(top_n)]
166
+
167
+ def generate_filename(prompt, username, file_type="md", title=None):
168
+ timestamp = format_timestamp_prefix(username)
169
+ if title:
170
+ high_info = '-'.join(get_high_info_terms(title, 5))
171
+ return f"{timestamp}-{clean_text_for_filename(prompt[:20])}-{high_info}.{file_type}"
172
+ hash_val = hashlib.md5(prompt.encode()).hexdigest()[:8]
173
+ return f"{timestamp}-{hash_val}.{file_type}"
174
+
175
+ def create_file(prompt, username, file_type="md", title=None):
176
+ filename = generate_filename(prompt, username, file_type, title)
177
+ with open(filename, 'w', encoding='utf-8') as f:
178
+ f.write(prompt)
179
+ return filename
180
+
181
+ def get_download_link(file, file_type="mp3"):
182
+ cache_key = f"dl_{file}"
183
+ if cache_key not in st.session_state['download_link_cache']:
184
+ with open(file, "rb") as f:
185
+ b64 = base64.b64encode(f.read()).decode()
186
+ mime_types = {"mp3": "audio/mpeg", "png": "image/png", "mp4": "video/mp4", "md": "text/markdown", "zip": "application/zip"}
187
+ st.session_state['download_link_cache'][cache_key] = f'<a href="data:{mime_types.get(file_type, "application/octet-stream")};base64,{b64}" download="{os.path.basename(file)}">{FILE_EMOJIS.get(file_type, "Download")} Download {os.path.basename(file)}</a>'
188
+ return st.session_state['download_link_cache'][cache_key]
189
+
190
+ def save_username(username):
191
+ try:
192
+ with open(STATE_FILE, 'w') as f:
193
+ f.write(username)
194
+ except Exception as e:
195
+ print(f"Failed to save username: {e}")
196
+
197
+ def load_username():
198
+ if os.path.exists(STATE_FILE):
199
+ try:
200
+ with open(STATE_FILE, 'r') as f:
201
+ return f.read().strip()
202
+ except Exception as e:
203
+ print(f"Failed to load username: {e}")
204
+ return None
205
+
206
+ def concatenate_markdown_files():
207
+ md_files = sorted(glob.glob("*.md"), key=os.path.getmtime, reverse=True)
208
+ all_md_content = ""
209
+ for md_file in md_files:
210
+ with open(md_file, 'r', encoding='utf-8') as f:
211
+ all_md_content += f.read() + "\n\n---\n\n"
212
+ return all_md_content.strip()
213
+
214
+ # 🎶 Audio Processing
215
+ async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_format="mp3"):
216
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
217
+ if cache_key in st.session_state['audio_cache']:
218
+ return st.session_state['audio_cache'][cache_key], 0
219
+ start_time = time.time()
220
+ text = clean_text_for_tts(text)
221
+ if not text or text == "No text":
222
+ print(f"Skipping audio generation for empty/invalid text: '{text}'")
223
+ return None, 0
224
+ filename = f"{format_timestamp_prefix(username)}-{hashlib.md5(text.encode()).hexdigest()[:8]}.{file_format}"
225
+ try:
226
+ communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
227
+ await communicate.save(filename)
228
+ st.session_state['audio_cache'][cache_key] = filename
229
+ return filename, time.time() - start_time
230
+ except edge_tts.exceptions.NoAudioReceived as e:
231
+ print(f"No audio received for text: '{text}' with voice: {voice}. Error: {e}")
232
+ return None, 0
233
+ except Exception as e:
234
+ print(f"Error generating audio for text: '{text}' with voice: {voice}. Error: {e}")
235
+ return None, 0
236
+
237
+ def play_and_download_audio(file_path):
238
+ if file_path and os.path.exists(file_path):
239
+ st.audio(file_path)
240
+ st.markdown(get_download_link(file_path), unsafe_allow_html=True)
241
+
242
+ def load_mp3_viewer():
243
+ mp3_files = sorted(glob.glob(f"*.mp3"), key=os.path.getmtime, reverse=True)
244
+ for mp3 in mp3_files:
245
+ filename = os.path.basename(mp3)
246
+ if filename not in st.session_state['mp3_files']:
247
+ st.session_state['mp3_files'][filename] = mp3
248
+
249
+ async def save_chat_entry(username, message, voice, is_markdown=False):
250
+ if not message.strip() or message == st.session_state.last_transcript:
251
+ return None, None
252
+ central = pytz.timezone('US/Central')
253
+ timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
254
+ entry = f"[{timestamp}] {username} ({voice}): {message}" if not is_markdown else f"[{timestamp}] {username} ({voice}):\n```markdown\n{message}\n```"
255
+ md_file = create_file(entry, username, "md")
256
+ with open(CHAT_FILE, 'a') as f:
257
+ f.write(f"{entry}\n")
258
+ audio_file, _ = await async_edge_tts_generate(message, voice, username)
259
+ if audio_file:
260
+ with open(HISTORY_FILE, 'a') as f:
261
+ f.write(f"[{timestamp}] {username}: Audio - {audio_file}\n")
262
+ st.session_state['mp3_files'][os.path.basename(audio_file)] = audio_file
263
+ await broadcast_message(f"{username}|{message}", "chat")
264
+ st.session_state.last_chat_update = time.time()
265
+ st.session_state.chat_history.append(entry)
266
+ st.session_state.last_transcript = message
267
+ return md_file, audio_file
268
+
269
+ async def load_chat():
270
+ if not os.path.exists(CHAT_FILE):
271
+ with open(CHAT_FILE, 'a') as f:
272
+ f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! 🎤\n")
273
+ with open(CHAT_FILE, 'r') as f:
274
+ content = f.read().strip()
275
+ lines = content.split('\n')
276
+ unique_lines = list(dict.fromkeys(line for line in lines if line.strip()))
277
+ return unique_lines
278
+
279
+ # Claude Search Function with Image Support
280
+ async def perform_claude_search(query, username, image=None):
281
+ if not query.strip() or query == st.session_state.last_transcript:
282
+ return None, None, None
283
+ client = anthropic.Anthropic(api_key=anthropic_key)
284
+ message_content = [{"type": "text", "text": query}]
285
+ if image:
286
+ # Convert PIL Image to base64
287
+ buffered = io.BytesIO()
288
+ image.save(buffered, format="PNG")
289
+ img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
290
+ message_content.append({
291
+ "type": "image",
292
+ "source": {
293
+ "type": "base64",
294
+ "media_type": "image/png",
295
+ "data": img_base64
296
+ }
297
+ })
298
+ response = client.messages.create(
299
+ model="claude-3-sonnet-20240229",
300
+ max_tokens=1000,
301
+ messages=[{"role": "user", "content": message_content}]
302
+ )
303
+ result = response.content[0].text
304
+ st.markdown(f"### Claude's Reply 🧠\n{result}")
305
+
306
+ voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
307
+ md_file, audio_file = await save_chat_entry(username, f"Claude Search: {query}\nResponse: {result}", voice, True)
308
+ return md_file, audio_file, result
309
+
310
+ # ArXiv Search Function
311
+ async def perform_arxiv_search(query, username, claude_result=None):
312
+ if not query.strip() or query == st.session_state.last_transcript:
313
+ return None, None
314
+ if claude_result is None:
315
+ client = anthropic.Anthropic(api_key=anthropic_key)
316
+ claude_response = client.messages.create(
317
+ model="claude-3-sonnet-20240229",
318
+ max_tokens=1000,
319
+ messages=[{"role": "user", "content": query}]
320
+ )
321
+ claude_result = claude_response.content[0].text
322
+ st.markdown(f"### Claude's Reply 🧠\n{claude_result}")
323
+
324
+ enhanced_query = f"{query}\n\n{claude_result}"
325
+ gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
326
+ refs = gradio_client.predict(
327
+ enhanced_query, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md"
328
+ )[0]
329
+ result = f"🔎 {enhanced_query}\n\n{refs}"
330
+ voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
331
+ md_file, audio_file = await save_chat_entry(username, f"ArXiv Search: {query}\nClaude Response: {claude_result}\nArXiv Results: {refs}", voice, True)
332
+ return md_file, audio_file
333
+
334
+ async def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
335
+ start = time.time()
336
+ client = anthropic.Anthropic(api_key=anthropic_key)
337
+ response = client.messages.create(
338
+ model="claude-3-sonnet-20240229",
339
+ max_tokens=1000,
340
+ messages=[{"role": "user", "content": q}]
341
+ )
342
+ st.write("Claude's reply 🧠:")
343
+ st.markdown(response.content[0].text)
344
+
345
+ result = response.content[0].text
346
+ md_file = create_file(result, "System", "md")
347
+ audio_file, _ = await async_edge_tts_generate(result, st.session_state['tts_voice'], "System")
348
+ st.subheader("📝 Main Response Audio")
349
+ play_and_download_audio(audio_file)
350
+
351
+ papers = []
352
+ if useArxiv:
353
+ q = q + result
354
+ st.write('Running Arxiv RAG with Claude inputs.')
355
+ gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
356
+ refs = gradio_client.predict(
357
+ q, 20, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md"
358
+ )[0]
359
+ papers = parse_arxiv_refs(refs, q)
360
+ for paper in papers:
361
+ filename = create_file(generate_5min_feature_markdown(paper), "System", "md", paper['title'])
362
+ paper['md_file'] = filename
363
+ st.session_state['paper_metadata'][paper['title']] = filename
364
+ if papers and useArxivAudio:
365
+ await create_paper_audio_files(papers, q)
366
+ elapsed = time.time() - start
367
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
368
+ return result, papers
369
+
370
+ # 🌐 WebSocket Handling
371
+ async def websocket_handler(websocket, path):
372
+ client_id = str(uuid.uuid4())
373
+ room_id = "chat"
374
+ if room_id not in st.session_state.active_connections:
375
+ st.session_state.active_connections[room_id] = {}
376
+ st.session_state.active_connections[room_id][client_id] = websocket
377
+ username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys())))
378
+ chat_content = await load_chat()
379
+ if not any(f"Client-{client_id}" in line for line in chat_content):
380
+ await save_chat_entry("System 🌟", f"{username} has joined {START_ROOM}!", "en-US-AriaNeural")
381
+ try:
382
+ async for message in websocket:
383
+ if '|' in message:
384
+ username, content = message.split('|', 1)
385
+ voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
386
+ await save_chat_entry(username, content, voice)
387
+ else:
388
+ await websocket.send("ERROR|Message format: username|content")
389
+ except websockets.ConnectionClosed:
390
+ await save_chat_entry("System 🌟", f"{username} has left {START_ROOM}!", "en-US-AriaNeural")
391
+ finally:
392
+ if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]:
393
+ del st.session_state.active_connections[room_id][client_id]
394
+
395
+ async def broadcast_message(message, room_id):
396
+ if room_id in st.session_state.active_connections:
397
+ disconnected = []
398
+ for client_id, ws in st.session_state.active_connections[room_id].items():
399
+ try:
400
+ await ws.send(message)
401
+ except websockets.ConnectionClosed:
402
+ disconnected.append(client_id)
403
+ for client_id in disconnected:
404
+ if client_id in st.session_state.active_connections[room_id]:
405
+ del st.session_state.active_connections[room_id][client_id]
406
+
407
+ async def run_websocket_server():
408
+ if not st.session_state.server_running:
409
+ server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
410
+ st.session_state.server_running = True
411
+ await server.wait_closed()
412
+
413
+ def start_websocket_server():
414
+ asyncio.run(run_websocket_server())
415
+
416
+ # 📚 PDF to Audio
417
+ class AudioProcessor:
418
+ def __init__(self):
419
+ self.cache_dir = AUDIO_CACHE_DIR
420
+ os.makedirs(self.cache_dir, exist_ok=True)
421
+ self.metadata = json.load(open(f"{self.cache_dir}/metadata.json")) if os.path.exists(f"{self.cache_dir}/metadata.json") else {}
422
+
423
+ def _save_metadata(self):
424
+ with open(f"{self.cache_dir}/metadata.json", 'w') as f:
425
+ json.dump(self.metadata, f)
426
+
427
+ async def create_audio(self, text, voice='en-US-AriaNeural'):
428
+ cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
429
+ cache_path = f"{self.cache_dir}/{cache_key}.mp3"
430
+ if cache_key in self.metadata and os.path.exists(cache_path):
431
+ return cache_path
432
+ text = clean_text_for_tts(text)
433
+ if not text:
434
+ return None
435
+ communicate = edge_tts.Communicate(text, voice)
436
+ await communicate.save(cache_path)
437
+ self.metadata[cache_key] = {'timestamp': datetime.now().isoformat(), 'text_length': len(text), 'voice': voice}
438
+ self._save_metadata()
439
+ return cache_path
440
+
441
+ def process_pdf(pdf_file, max_pages, voice, audio_processor):
442
+ reader = PdfReader(pdf_file)
443
+ total_pages = min(len(reader.pages), max_pages)
444
+ texts, audios = [], {}
445
+ async def process_page(i, text):
446
+ audio_path = await audio_processor.create_audio(text, voice)
447
+ if audio_path:
448
+ audios[i] = audio_path
449
+ for i in range(total_pages):
450
+ text = reader.pages[i].extract_text()
451
+ texts.append(text)
452
+ threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start()
453
+ return texts, audios, total_pages
454
+
455
+ # 🔍 ArXiv & AI Lookup
456
+ def parse_arxiv_refs(ref_text, query):
457
+ if not ref_text:
458
+ return []
459
+ papers = []
460
+ current = {}
461
+ for line in ref_text.split('\n'):
462
+ if line.count('|') == 2:
463
+ if current:
464
+ papers.append(current)
465
+ date, title, *_ = line.strip('* ').split('|')
466
+ url = re.search(r'(https://arxiv.org/\S+)', line).group(1) if re.search(r'(https://arxiv.org/\S+)', line) else f"paper_{len(papers)}"
467
+ current = {'date': date, 'title': title, 'url': url, 'authors': '', 'summary': '', 'full_audio': None, 'download_base64': '', 'query': query}
468
+ elif current:
469
+ if not current['authors']:
470
+ current['authors'] = line.strip('* ')
471
+ else:
472
+ current['summary'] += ' ' + line.strip() if current['summary'] else line.strip()
473
+ if current:
474
+ papers.append(current)
475
+ return papers[:20]
476
+
477
+ def generate_5min_feature_markdown(paper):
478
+ title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
479
+ pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
480
+ wct, sw = len(title.split()), len(summary.split())
481
+ terms = get_high_info_terms(summary, 15)
482
+ rouge = round((len(terms) / max(sw, 1)) * 100, 2)
483
+ mermaid = "```mermaid\nflowchart TD\n" + "\n".join(f' T{i+1}["{t}"] --> T{i+2}["{terms[i+1]}"]' for i in range(len(terms)-1)) + "\n```"
484
+ return f"""
485
+ ## 📄 {title}
486
+ **Authors:** {authors}
487
+ **Date:** {date}
488
+ **Words:** Title: {wct}, Summary: {sw}
489
+ **Links:** [Abstract]({url}) | [PDF]({pdf_url})
490
+ **Terms:** {', '.join(terms)}
491
+ **ROUGE:** {rouge}%
492
+ ### 🎤 TTF Read Aloud
493
+ - **Title:** {title}
494
+ - **Terms:** {', '.join(terms)}
495
+ - **ROUGE:** {rouge}%
496
+ #### Concepts Graph
497
+ {mermaid}
498
+ ---
499
+ """
500
+
501
+ async def create_paper_audio_files(papers, query):
502
+ for p in papers:
503
+ audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
504
+ p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'], p['authors'])
505
+ if p['full_audio']:
506
+ p['download_base64'] = get_download_link(p['full_audio'])
507
+
508
+ def save_vote(file, item, user_hash):
509
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
510
+ entry = f"[{timestamp}] {user_hash} voted for {item}"
511
+ try:
512
+ with open(file, 'a') as f:
513
+ f.write(f"{entry}\n")
514
+ with open(HISTORY_FILE, 'a') as f:
515
+ f.write(f"- {timestamp} - User {user_hash} voted for {item}\n")
516
+ return True
517
+ except Exception as e:
518
+ print(f"Vote save flop: {e}")
519
+ return False
520
+
521
+ def load_votes(file):
522
+ if not os.path.exists(file):
523
+ with open(file, 'w') as f:
524
+ f.write("# Vote Tally\n\nNo votes yet - get clicking! 🖱️\n")
525
+ try:
526
+ with open(file, 'r') as f:
527
+ lines = f.read().strip().split('\n')
528
+ votes = {}
529
+ for line in lines[2:]:
530
+ if line.strip() and 'voted for' in line:
531
+ item = line.split('voted for ')[1]
532
+ votes[item] = votes.get(item, 0) + 1
533
+ return votes
534
+ except Exception as e:
535
+ print(f"Vote load oopsie: {e}")
536
+ return {}
537
+
538
+ def generate_user_hash():
539
+ if 'user_hash' not in st.session_state:
540
+ session_id = str(random.getrandbits(128))
541
+ hash_object = hashlib.md5(session_id.encode())
542
+ st.session_state['user_hash'] = hash_object.hexdigest()[:8]
543
+ return st.session_state['user_hash']
544
+
545
+ async def save_pasted_image(image, username):
546
+ img_hash = hashlib.md5(image.tobytes()).hexdigest()[:8]
547
+ if img_hash in st.session_state.image_hashes:
548
+ return None
549
+ timestamp = format_timestamp_prefix(username)
550
+ filename = f"{timestamp}-{img_hash}.png"
551
+ filepath = filename
552
+ image.save(filepath, "PNG")
553
+ st.session_state.image_hashes.add(img_hash)
554
+ return filepath
555
+
556
+ # 📦 Zip Files
557
+ def create_zip_of_files(md_files, mp3_files, png_files, mp4_files, query):
558
+ all_files = md_files + mp3_files + png_files + mp4_files
559
+ if not all_files:
560
+ return None
561
+ terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in all_files] + [query]), 5)
562
+ zip_name = f"{format_timestamp_prefix()}_{'-'.join(terms)[:20]}.zip"
563
+ with zipfile.ZipFile(zip_name, 'w') as z:
564
+ [z.write(f) for f in all_files]
565
+ return zip_name
566
+
567
+ # 🎮 Main Interface
568
+ def main():
569
+ init_session_state()
570
+ load_mp3_viewer()
571
+ saved_username = load_username()
572
+ if saved_username and saved_username in FUN_USERNAMES:
573
+ st.session_state.username = saved_username
574
+ if not st.session_state.username:
575
+ available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in asyncio.run(load_chat()))]
576
+ st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
577
+ st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username]
578
+ asyncio.run(save_chat_entry("System 🌟", f"{st.session_state.username} has joined {START_ROOM}!", "en-US-AriaNeural"))
579
+ save_username(st.session_state.username)
580
+
581
+ st.title(f"{Site_Name} for {st.session_state.username}")
582
+ update_marquee_settings_ui()
583
+ display_marquee(f"🚀 Welcome to {START_ROOM} | 🤖 {st.session_state.username}", st.session_state['marquee_settings'], "welcome")
584
+
585
+ # Speech Component at Top Level
586
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
587
+ val = mycomponent(my_input_value="")
588
+ if val and val != st.session_state.last_transcript:
589
+ val_stripped = val.strip().replace('\n', ' ')
590
+ if val_stripped:
591
+ voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
592
+ md_file, audio_file = asyncio.run(save_chat_entry(st.session_state.username, val_stripped, voice))
593
+ if audio_file:
594
+ play_and_download_audio(audio_file)
595
+ st.rerun()
596
+
597
+ tab_main = st.radio("Action:", ["🎤 Chat & Voice", "🔍 ArXiv", "📚 PDF to Audio"], horizontal=True, key="tab_main")
598
+ useArxiv = st.checkbox("Search ArXiv", True, key="use_arxiv")
599
+ useArxivAudio = st.checkbox("ArXiv Audio", False, key="use_arxiv_audio")
600
+ st.checkbox("Autosend Chat", value=True, key="autosend")
601
+ st.checkbox("Autosearch ArXiv", value=True, key="autosearch")
602
+
603
+ # 🎤 Chat & Voice
604
+ if tab_main == "🎤 Chat & Voice":
605
+ st.subheader(f"{START_ROOM} Chat 💬")
606
+ chat_content = asyncio.run(load_chat())
607
+ chat_container = st.container()
608
+ with chat_container:
609
+ numbered_content = "\n".join(f"{i+1}. {line}" for i, line in enumerate(chat_content))
610
+ st.code(numbered_content, language="python")
611
+
612
+ message = st.text_input(f"Message as {st.session_state.username}", key="message_input")
613
+ paste_result = paste_image_button("Paste Image 📋", key=f"paste_button_msg_{int(time.time())}") # Dynamic key to force re-render
614
+ if paste_result.image_data is not None and not st.session_state['paste_trigger']:
615
+ st.session_state['paste_trigger'] = True
616
+ voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
617
+ if isinstance(paste_result.image_data, str):
618
+ st.session_state.message_text = paste_result.image_data
619
+ message = st.text_input(f"Message as {st.session_state.username}", key="message_input_paste", value=st.session_state.message_text)
620
+ else:
621
+ st.image(paste_result.image_data, caption="Pasted Image")
622
+ filename = asyncio.run(save_pasted_image(paste_result.image_data, st.session_state.username))
623
+ if filename:
624
+ st.session_state.pasted_image_data = filename
625
+ # Allow user to add text to accompany the image
626
+ image_prompt = st.text_input("Add a prompt for Claude (e.g., 'OCR this image')", key="image_prompt")
627
+ if image_prompt:
628
+ # Run Claude search with both text and image
629
+ md_file_claude, audio_file_claude, claude_result = asyncio.run(
630
+ perform_claude_search(image_prompt, st.session_state.username, paste_result.image_data)
631
+ )
632
+ if audio_file_claude:
633
+ play_and_download_audio(audio_file_claude)
634
+ # Feed Claude result to ArXiv search
635
+ md_file_arxiv, audio_file_arxiv = asyncio.run(
636
+ perform_arxiv_search(image_prompt, st.session_state.username, claude_result)
637
+ )
638
+ if audio_file_arxiv:
639
+ play_and_download_audio(audio_file_arxiv)
640
+ st.session_state.pasted_image_data = None
641
+ st.session_state['paste_trigger'] = False
642
+ st.session_state.timer_start = time.time()
643
+ save_username(st.session_state.username)
644
+ st.rerun()
645
+
646
+ if (message and message != st.session_state.last_message) or (st.session_state.pasted_image_data and not st.session_state['paste_trigger']):
647
+ st.session_state.last_message = message
648
+ col_send, col_claude, col_arxiv = st.columns([1, 1, 1])
649
+
650
+ with col_send:
651
+ if st.session_state.autosend or st.button("Send 🚀", key="send_button"):
652
+ voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
653
+ if message.strip():
654
+ md_file, audio_file = asyncio.run(save_chat_entry(st.session_state.username, message, voice, True))
655
+ if audio_file:
656
+ play_and_download_audio(audio_file)
657
+ if st.session_state.pasted_image_data:
658
+ asyncio.run(save_chat_entry(st.session_state.username, f"Pasted image: {st.session_state.pasted_image_data}", voice))
659
+ st.session_state.pasted_image_data = None
660
+ st.session_state.timer_start = time.time()
661
+ save_username(st.session_state.username)
662
+ st.rerun()
663
+
664
+ with col_claude:
665
+ if st.button("🧠 Claude", key="claude_button"):
666
+ voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
667
+ if message.strip():
668
+ md_file, audio_file, _ = asyncio.run(perform_claude_search(message, st.session_state.username))
669
+ if audio_file:
670
+ play_and_download_audio(audio_file)
671
+ st.session_state.timer_start = time.time()
672
+ save_username(st.session_state.username)
673
+ st.rerun()
674
+
675
+ with col_arxiv:
676
+ if st.button("🔍 ArXiv", key="arxiv_button"):
677
+ voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
678
+ if message.strip():
679
+ md_file, audio_file = asyncio.run(perform_arxiv_search(message, st.session_state.username))
680
+ if audio_file:
681
+ play_and_download_audio(audio_file)
682
+ st.session_state.timer_start = time.time()
683
+ save_username(st.session_state.username)
684
+ st.rerun()
685
+
686
+ # 🔍 ArXiv
687
+ elif tab_main == "🔍 ArXiv":
688
+ st.subheader("🔍 Query ArXiv")
689
+ q = st.text_input("🔍 Query:", key="arxiv_query")
690
+ if q and q != st.session_state.last_query:
691
+ st.session_state.last_query = q
692
+ if st.session_state.autosearch or st.button("🔍 Run", key="arxiv_run"):
693
+ result, papers = asyncio.run(perform_ai_lookup(q, useArxiv=useArxiv, useArxivAudio=useArxivAudio))
694
+ st.markdown(f"### Query: {q}")
695
+ for i, p in enumerate(papers, 1):
696
+ expander_label = f"{p['title']} | [arXiv Link]({p['url']})"
697
+ with st.expander(expander_label):
698
+ with open(p['md_file'], 'r', encoding='utf-8') as f:
699
+ content = f.read()
700
+ numbered_content = "\n".join(f"{j+1}. {line}" for j, line in enumerate(content.split('\n')))
701
+ st.code(numbered_content, language="python")
702
+
703
+ # 📚 PDF to Audio
704
+ elif tab_main == "📚 PDF to Audio":
705
+ audio_processor = AudioProcessor()
706
+ pdf_file = st.file_uploader("Choose PDF", "pdf", key="pdf_upload")
707
+ max_pages = st.slider('Pages', 1, 100, 10, key="pdf_pages")
708
+ if pdf_file:
709
+ with st.spinner('Processing...'):
710
+ texts, audios, total = process_pdf(pdf_file, max_pages, st.session_state['tts_voice'], audio_processor)
711
+ for i, text in enumerate(texts):
712
+ with st.expander(f"Page {i+1}"):
713
+ st.markdown(text)
714
+ while i not in audios:
715
+ time.sleep(0.1)
716
+ if audios.get(i):
717
+ st.audio(audios[i])
718
+ st.markdown(get_download_link(audios[i], "mp3"), unsafe_allow_html=True)
719
+ voice = FUN_USERNAMES.get(st.session_state.username, "en-US-AriaNeural")
720
+ asyncio.run(save_chat_entry(st.session_state.username, f"PDF Page {i+1} converted to audio: {audios[i]}", voice))
721
+
722
+ # Always Visible Media Gallery
723
+ st.header("📸 Media Gallery")
724
+ all_files = sorted(glob.glob("*.md") + glob.glob("*.mp3") + glob.glob("*.png") + glob.glob("*.mp4"), key=os.path.getmtime, reverse=True)
725
+ md_files = [f for f in all_files if f.endswith('.md')]
726
+ mp3_files = [f for f in all_files if f.endswith('.mp3')]
727
+ png_files = [f for f in all_files if f.endswith('.png')]
728
+ mp4_files = [f for f in all_files if f.endswith('.mp4')]
729
+
730
+ st.subheader("All Submitted Text")
731
+ all_md_content = concatenate_markdown_files()
732
+ with st.expander("View All Markdown Content"):
733
+ st.markdown(all_md_content)
734
+
735
+ st.subheader("🎵 Audio (MP3)")
736
+ for mp3 in mp3_files:
737
+ with st.expander(os.path.basename(mp3)):
738
+ st.audio(mp3)
739
+ st.markdown(get_download_link(mp3, "mp3"), unsafe_allow_html=True)
740
+
741
+ st.subheader("🖼️ Images (PNG)")
742
+ for png in png_files:
743
+ with st.expander(os.path.basename(png)):
744
+ st.image(png, use_container_width=True)
745
+ st.markdown(get_download_link(png, "png"), unsafe_allow_html=True)
746
+
747
+ st.subheader("🎥 Videos (MP4)")
748
+ for mp4 in mp4_files:
749
+ with st.expander(os.path.basename(mp4)):
750
+ st.video(mp4)
751
+ st.markdown(get_download_link(mp4, "mp4"), unsafe_allow_html=True)
752
+
753
+ # 🗂️ Sidebar with Dialog and Audio
754
+ st.sidebar.subheader("Voice Settings")
755
+ new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username), key="username_select")
756
+ if new_username != st.session_state.username:
757
+ asyncio.run(save_chat_entry("System 🌟", f"{st.session_state.username} changed to {new_username}", "en-US-AriaNeural"))
758
+ st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
759
+ st.session_state.timer_start = time.time()
760
+ save_username(st.session_state.username)
761
+ st.rerun()
762
+
763
+ st.sidebar.markdown("### 💬 Chat Dialog")
764
+ chat_content = asyncio.run(load_chat())
765
+ with st.sidebar.expander("Chat History"):
766
+ numbered_content = "\n".join(f"{i+1}. {line}" for i, line in enumerate(chat_content))
767
+ st.code(numbered_content, language="python")
768
+
769
+ st.sidebar.subheader("Vote Totals")
770
+ chat_votes = load_votes(QUOTE_VOTES_FILE)
771
+ image_votes = load_votes(IMAGE_VOTES_FILE)
772
+ for item, count in chat_votes.items():
773
+ st.sidebar.write(f"{item}: {count} votes")
774
+ for image, count in image_votes.items():
775
+ st.sidebar.write(f"{image}: {count} votes")
776
+
777
+ st.sidebar.markdown("### 📂 File History")
778
+ for f in all_files[:10]:
779
+ st.sidebar.write(f"{FILE_EMOJIS.get(f.split('.')[-1], '📄')} {os.path.basename(f)}")
780
+ if st.sidebar.button("⬇️ Zip All", key="zip_all"):
781
+ zip_name = create_zip_of_files(md_files, mp3_files, png_files, mp4_files, "latest_query")
782
+ if zip_name:
783
+ st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
784
+
785
+ # Refresh Timer in Sidebar
786
+ st.sidebar.subheader("Set Refresh Rate ⏳")
787
+ st.markdown("""
788
+ <style>
789
+ .timer {
790
+ font-size: 24px;
791
+ color: #ffcc00;
792
+ text-align: center;
793
+ animation: pulse 1s infinite;
794
+ }
795
+ @keyframes pulse {
796
+ 0% { transform: scale(1); }
797
+ 50% { transform: scale(1.1); }
798
+ 100% { transform: scale(1); }
799
+ }
800
+ </style>
801
+ """, unsafe_allow_html=True)
802
+
803
+ refresh_rate = st.sidebar.slider("Refresh Rate (seconds)", min_value=1, max_value=300, value=st.session_state.refresh_rate, step=1)
804
+ if refresh_rate != st.session_state.refresh_rate:
805
+ st.session_state.refresh_rate = refresh_rate
806
+ st.session_state.timer_start = time.time()
807
+ save_username(st.session_state.username)
808
+
809
+ col1, col2, col3 = st.sidebar.columns(3)
810
+ with col1:
811
+ if st.button("🐇 Small (1s)"):
812
+ st.session_state.refresh_rate = 1
813
+ st.session_state.timer_start = time.time()
814
+ save_username(st.session_state.username)
815
+ with col2:
816
+ if st.button("🐢 Medium (5s)"):
817
+ st.session_state.refresh_rate = 5
818
+ st.session_state.timer_start = time.time()
819
+ save_username(st.session_state.username)
820
+ with col3:
821
+ if st.button("🐘 Large (5m)"):
822
+ st.session_state.refresh_rate = 300
823
+ st.session_state.timer_start = time.time()
824
+ save_username(st.session_state.username)
825
+
826
+ timer_placeholder = st.sidebar.empty()
827
+ start_time = st.session_state.timer_start
828
+ remaining_time = int(st.session_state.refresh_rate - (time.time() - start_time))
829
+ if remaining_time <= 0:
830
+ st.session_state.timer_start = time.time()
831
+ st.session_state.last_refresh = time.time()
832
+ st.rerun()
833
+ else:
834
+ timer_placeholder.markdown(f"<p class='timer'>⏳ Next refresh in: {remaining_time} seconds</p>", unsafe_allow_html=True)
835
+
836
+ # Start WebSocket server in a separate thread
837
+ if not st.session_state.server_running and not st.session_state.server_task:
838
+ st.session_state.server_task = threading.Thread(target=start_websocket_server, daemon=True)
839
+ st.session_state.server_task.start()
840
+
841
+ if __name__ == "__main__":
842
+ main()