awacke1 commited on
Commit
309d3fb
·
verified ·
1 Parent(s): dd7eb6c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +708 -0
app.py ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import websockets
4
+ import uuid
5
+ from datetime import datetime
6
+ import os
7
+ import random
8
+ import time
9
+ import hashlib
10
+ from PIL import Image
11
+ import glob
12
+ import base64
13
+ import io
14
+ import streamlit.components.v1 as components
15
+ import edge_tts
16
+ from audio_recorder_streamlit import audio_recorder
17
+ import nest_asyncio
18
+ import re
19
+ from streamlit_paste_button import paste_image_button
20
+ import pytz
21
+ import shutil
22
+ import anthropic
23
+ import openai
24
+ from PyPDF2 import PdfReader
25
+ import threading
26
+ import json
27
+ import zipfile
28
+ from gradio_client import Client
29
+ from dotenv import load_dotenv
30
+ from streamlit_marquee import streamlit_marquee
31
+ from collections import defaultdict, Counter
32
+ import pandas as pd
33
+
34
+ # 🛠️ Patch asyncio for nesting
35
+ nest_asyncio.apply()
36
+
37
+ # 🎨 Page Config
38
+ st.set_page_config(
39
+ page_title="🚲TalkingAIResearcher🏆",
40
+ page_icon="🚲🏆",
41
+ layout="wide",
42
+ initial_sidebar_state="auto"
43
+ )
44
+
45
+ # 🌟 Static Config
46
+ icons = '🤖🧠🔬📝'
47
+ Site_Name = '🤖🧠Chat & Quote Node📝🔬'
48
+ START_ROOM = "Sector 🌌"
49
+ FUN_USERNAMES = {
50
+ "CosmicJester 🌌": "en-US-AriaNeural",
51
+ "PixelPanda 🐼": "en-US-JennyNeural",
52
+ "QuantumQuack 🦆": "en-GB-SoniaNeural",
53
+ "StellarSquirrel 🐿️": "en-AU-NatashaNeural",
54
+ "GizmoGuru ⚙️": "en-CA-ClaraNeural",
55
+ "NebulaNinja 🌠": "en-US-GuyNeural",
56
+ "ByteBuster 💾": "en-GB-RyanNeural",
57
+ "GalacticGopher 🌍": "en-AU-WilliamNeural",
58
+ "RocketRaccoon 🚀": "en-CA-LiamNeural",
59
+ "EchoElf 🧝": "en-US-AnaNeural",
60
+ "PhantomFox 🦊": "en-US-BrandonNeural",
61
+ "WittyWizard 🧙": "en-GB-ThomasNeural",
62
+ "LunarLlama 🌙": "en-AU-FreyaNeural",
63
+ "SolarSloth ☀️": "en-CA-LindaNeural",
64
+ "AstroAlpaca 🦙": "en-US-ChristopherNeural",
65
+ "CyberCoyote 🐺": "en-GB-ElliotNeural",
66
+ "MysticMoose 🦌": "en-AU-JamesNeural",
67
+ "GlitchGnome 🧚": "en-CA-EthanNeural",
68
+ "VortexViper 🐍": "en-US-AmberNeural",
69
+ "ChronoChimp 🐒": "en-GB-LibbyNeural"
70
+ }
71
+ EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))
72
+ FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "png": "🖼️", "mp4": "🎥"}
73
+
74
+ # 📁 Directories (Media at Root)
75
+ for d in ["chat_logs", "vote_logs", "audio_logs", "history_logs", "audio_cache"]:
76
+ os.makedirs(d, exist_ok=True)
77
+
78
+ CHAT_DIR = "chat_logs"
79
+ VOTE_DIR = "vote_logs"
80
+ MEDIA_DIR = "."
81
+ AUDIO_CACHE_DIR = "audio_cache"
82
+ AUDIO_DIR = "audio_logs"
83
+ STATE_FILE = "user_state.txt"
84
+
85
+ CHAT_FILE = os.path.join(CHAT_DIR, "global_chat.md")
86
+ QUOTE_VOTES_FILE = os.path.join(VOTE_DIR, "quote_votes.md")
87
+ IMAGE_VOTES_FILE = os.path.join(VOTE_DIR, "image_votes.md")
88
+ HISTORY_FILE = os.path.join(VOTE_DIR, "vote_history.md")
89
+
90
+ # 🔑 API Keys
91
+ load_dotenv()
92
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY', st.secrets.get('ANTHROPIC_API_KEY', ""))
93
+ openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ""))
94
+ openai_client = openai.OpenAI(api_key=openai_api_key)
95
+
96
+ # 🕒 Timestamp Helper
97
+ def format_timestamp_prefix(username=""):
98
+ central = pytz.timezone('US/Central')
99
+ now = datetime.now(central)
100
+ return f"{now.strftime('%Y%m%d_%H%M%S')}-by-{username}"
101
+
102
+ # 📈 Performance Timer
103
+ class PerformanceTimer:
104
+ def __init__(self, name):
105
+ self.name, self.start = name, None
106
+ def __enter__(self):
107
+ self.start = time.time()
108
+ return self
109
+ def __exit__(self, *args):
110
+ duration = time.time() - self.start
111
+ st.session_state['operation_timings'][self.name] = duration
112
+ st.session_state['performance_metrics'][self.name].append(duration)
113
+
114
+ # 🎛️ Session State Init
115
+ def init_session_state():
116
+ defaults = {
117
+ 'server_running': False, 'server_task': None, 'active_connections': {},
118
+ 'media_notifications': [], 'last_chat_update': 0, 'displayed_chat_lines': [],
119
+ 'message_text': "", 'audio_cache': {}, 'pasted_image_data': None,
120
+ 'quote_line': None, 'refresh_rate': 5, 'base64_cache': {},
121
+ 'transcript_history': [], 'last_transcript': "", 'image_hashes': set(),
122
+ 'tts_voice': "en-US-AriaNeural", 'chat_history': [], 'marquee_settings': {
123
+ "background": "#1E1E1E", "color": "#FFFFFF", "font-size": "14px",
124
+ "animationDuration": "20s", "width": "100%", "lineHeight": "35px"
125
+ }, 'operation_timings': {}, 'performance_metrics': defaultdict(list),
126
+ 'enable_audio': True, 'download_link_cache': {}, 'username': None,
127
+ 'autosend': True, 'autosearch': True, 'last_message': "", 'last_query': "",
128
+ 'mp3_files': {}, 'timer_start': time.time(), 'quote_index': 0,
129
+ 'quote_source': "famous", 'last_sent_transcript': ""
130
+ }
131
+ for k, v in defaults.items():
132
+ if k not in st.session_state:
133
+ st.session_state[k] = v
134
+
135
+ # 🖌️ Marquee Helpers
136
+ def update_marquee_settings_ui():
137
+ st.sidebar.markdown("### 🎯 Marquee Settings")
138
+ cols = st.sidebar.columns(2)
139
+ with cols[0]:
140
+ st.session_state['marquee_settings']['background'] = st.color_picker("🎨 Background", "#1E1E1E")
141
+ st.session_state['marquee_settings']['color'] = st.color_picker("✍️ Text", "#FFFFFF")
142
+ with cols[1]:
143
+ st.session_state['marquee_settings']['font-size'] = f"{st.slider('📏 Size', 10, 24, 14)}px"
144
+ st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('⏱️ Speed', 1, 20, 20)}s"
145
+
146
+ def display_marquee(text, settings, key_suffix=""):
147
+ truncated = text[:280] + "..." if len(text) > 280 else text
148
+ streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
149
+ st.write("")
150
+
151
+ # 📝 Text & File Helpers
152
+ def clean_text_for_tts(text):
153
+ return re.sub(r'[#*!\[\]]+', '', ' '.join(text.split()))[:200] or "No text"
154
+
155
+ def clean_text_for_filename(text):
156
+ return '_'.join(re.sub(r'[^\w\s-]', '', text.lower()).split())[:200]
157
+
158
+ def get_high_info_terms(text, top_n=10):
159
+ stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'}
160
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
161
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
162
+ filtered = [t for t in words + bi_grams if t not in stop_words and len(t.split()) <= 2]
163
+ return [t for t, _ in Counter(filtered).most_common(top_n)]
164
+
165
+ def generate_filename(prompt, username, file_type="md"):
166
+ timestamp = format_timestamp_prefix(username)
167
+ hash_val = hashlib.md5(prompt.encode()).hexdigest()[:8]
168
+ return f"{timestamp}-{hash_val}.{file_type}"
169
+
170
+ def create_file(prompt, username, file_type="md"):
171
+ filename = generate_filename(prompt, username, file_type)
172
+ with open(filename, 'w', encoding='utf-8') as f:
173
+ f.write(prompt)
174
+ return filename
175
+
176
+ def get_download_link(file, file_type="mp3"):
177
+ cache_key = f"dl_{file}"
178
+ if cache_key not in st.session_state['download_link_cache']:
179
+ with open(file, "rb") as f:
180
+ b64 = base64.b64encode(f.read()).decode()
181
+ mime_types = {"mp3": "audio/mpeg", "png": "image/png", "mp4": "video/mp4", "md": "text/markdown"}
182
+ st.session_state['download_link_cache'][cache_key] = f'<a href="data:{mime_types.get(file_type, "application/octet-stream")};base64,{b64}" download="{os.path.basename(file)}">{FILE_EMOJIS.get(file_type, "Download")} Download {os.path.basename(file)}</a>'
183
+ return st.session_state['download_link_cache'][cache_key]
184
+
185
+ def save_username(username):
186
+ try:
187
+ with open(STATE_FILE, 'w') as f:
188
+ f.write(username)
189
+ except Exception as e:
190
+ print(f"Failed to save username: {e}")
191
+
192
+ def load_username():
193
+ if os.path.exists(STATE_FILE):
194
+ try:
195
+ with open(STATE_FILE, 'r') as f:
196
+ return f.read().strip()
197
+ except Exception as e:
198
+ print(f"Failed to load username: {e}")
199
+ return None
200
+
201
+ def concatenate_markdown_files():
202
+ md_files = sorted(glob.glob("*.md"), key=os.path.getmtime, reverse=True)
203
+ all_md_content = ""
204
+ for md_file in md_files:
205
+ with open(md_file, 'r', encoding='utf-8') as f:
206
+ all_md_content += f.read() + "\n\n---\n\n"
207
+ return all_md_content.strip()
208
+
209
+ # 🎶 Audio Processing
210
+ async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_format="mp3"):
211
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
212
+ if cache_key in st.session_state['audio_cache']:
213
+ return st.session_state['audio_cache'][cache_key], 0
214
+ start_time = time.time()
215
+ text = clean_text_for_tts(text)
216
+ if not text:
217
+ return None, 0
218
+ filename = f"{format_timestamp_prefix(username)}-{hashlib.md5(text.encode()).hexdigest()[:8]}.{file_format}"
219
+ communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
220
+ await communicate.save(filename)
221
+ st.session_state['audio_cache'][cache_key] = filename
222
+ return filename, time.time() - start_time
223
+
224
+ def play_and_download_audio(file_path):
225
+ if file_path and os.path.exists(file_path):
226
+ st.audio(file_path)
227
+ st.markdown(get_download_link(file_path), unsafe_allow_html=True)
228
+
229
+ def load_mp3_viewer():
230
+ mp3_files = sorted(glob.glob(f"*.mp3"), key=os.path.getmtime, reverse=True)
231
+ for mp3 in mp3_files:
232
+ filename = os.path.basename(mp3)
233
+ if filename not in st.session_state['mp3_files']:
234
+ st.session_state['mp3_files'][filename] = mp3
235
+
236
+ async def save_chat_entry(username, message, is_markdown=False):
237
+ central = pytz.timezone('US/Central')
238
+ timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
239
+ entry = f"[{timestamp}] {username}: {message}" if not is_markdown else f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
240
+ with open(CHAT_FILE, 'a') as f:
241
+ f.write(f"{entry}\n")
242
+ voice = FUN_USERNAMES.get(username, "en-US-AriaNeural")
243
+ audio_file, _ = await async_edge_tts_generate(message, voice, username)
244
+ if audio_file:
245
+ with open(HISTORY_FILE, 'a') as f:
246
+ f.write(f"[{timestamp}] {username}: Audio - {audio_file}\n")
247
+ st.session_state['mp3_files'][os.path.basename(audio_file)] = audio_file
248
+ await broadcast_message(f"{username}|{message}", "chat")
249
+ st.session_state.last_chat_update = time.time()
250
+ st.session_state.chat_history.append(entry)
251
+ return audio_file
252
+
253
+ async def load_chat():
254
+ if not os.path.exists(CHAT_FILE):
255
+ with open(CHAT_FILE, 'a') as f:
256
+ f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! 🎤\n")
257
+ with open(CHAT_FILE, 'r') as f:
258
+ content = f.read().strip()
259
+ lines = content.split('\n')
260
+ numbered_content = "\n".join(f"{i+1}. {line}" for i, line in enumerate(lines) if line.strip())
261
+ return numbered_content
262
+
263
+ # 🌐 WebSocket Handling
264
+ async def websocket_handler(websocket, path):
265
+ client_id = str(uuid.uuid4())
266
+ room_id = "chat"
267
+ if room_id not in st.session_state.active_connections:
268
+ st.session_state.active_connections[room_id] = {}
269
+ st.session_state.active_connections[room_id][client_id] = websocket
270
+ username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys())))
271
+ chat_content = await load_chat()
272
+ if not any(f"Client-{client_id}" in line for line in chat_content.split('\n')):
273
+ await save_chat_entry("System 🌟", f"{username} has joined {START_ROOM}!")
274
+ try:
275
+ async for message in websocket:
276
+ if '|' in message:
277
+ username, content = message.split('|', 1)
278
+ await save_chat_entry(username, content)
279
+ else:
280
+ await websocket.send("ERROR|Message format: username|content")
281
+ except websockets.ConnectionClosed:
282
+ await save_chat_entry("System 🌟", f"{username} has left {START_ROOM}!")
283
+ finally:
284
+ if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]:
285
+ del st.session_state.active_connections[room_id][client_id]
286
+
287
+ async def broadcast_message(message, room_id):
288
+ if room_id in st.session_state.active_connections:
289
+ disconnected = []
290
+ for client_id, ws in st.session_state.active_connections[room_id].items():
291
+ try:
292
+ await ws.send(message)
293
+ except websockets.ConnectionClosed:
294
+ disconnected.append(client_id)
295
+ for client_id in disconnected:
296
+ if client_id in st.session_state.active_connections[room_id]:
297
+ del st.session_state.active_connections[room_id][client_id]
298
+
299
+ async def run_websocket_server():
300
+ if not st.session_state.server_running:
301
+ server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
302
+ st.session_state.server_running = True
303
+ await server.wait_closed()
304
+
305
+ # 📚 PDF to Audio
306
+ class AudioProcessor:
307
+ def __init__(self):
308
+ self.cache_dir = AUDIO_CACHE_DIR
309
+ os.makedirs(self.cache_dir, exist_ok=True)
310
+ self.metadata = json.load(open(f"{self.cache_dir}/metadata.json")) if os.path.exists(f"{self.cache_dir}/metadata.json") else {}
311
+
312
+ def _save_metadata(self):
313
+ with open(f"{self.cache_dir}/metadata.json", 'w') as f:
314
+ json.dump(self.metadata, f)
315
+
316
+ async def create_audio(self, text, voice='en-US-AriaNeural'):
317
+ cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
318
+ cache_path = f"{self.cache_dir}/{cache_key}.mp3"
319
+ if cache_key in self.metadata and os.path.exists(cache_path):
320
+ return cache_path
321
+ text = clean_text_for_tts(text)
322
+ if not text:
323
+ return None
324
+ communicate = edge_tts.Communicate(text, voice)
325
+ await communicate.save(cache_path)
326
+ self.metadata[cache_key] = {'timestamp': datetime.now().isoformat(), 'text_length': len(text), 'voice': voice}
327
+ self._save_metadata()
328
+ return cache_path
329
+
330
+ def process_pdf(pdf_file, max_pages, voice, audio_processor):
331
+ reader = PdfReader(pdf_file)
332
+ total_pages = min(len(reader.pages), max_pages)
333
+ texts, audios = [], {}
334
+ async def process_page(i, text):
335
+ audio_path = await audio_processor.create_audio(text, voice)
336
+ if audio_path:
337
+ audios[i] = audio_path
338
+ for i in range(total_pages):
339
+ text = reader.pages[i].extract_text()
340
+ texts.append(text)
341
+ threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start()
342
+ return texts, audios, total_pages
343
+
344
+ # 🔍 ArXiv & AI Lookup
345
+ def parse_arxiv_refs(ref_text):
346
+ if not ref_text:
347
+ return []
348
+ papers = []
349
+ current = {}
350
+ for line in ref_text.split('\n'):
351
+ if line.count('|') == 2:
352
+ if current:
353
+ papers.append(current)
354
+ date, title, *_ = line.strip('* ').split('|')
355
+ url = re.search(r'(https://arxiv.org/\S+)', line).group(1) if re.search(r'(https://arxiv.org/\S+)', line) else f"paper_{len(papers)}"
356
+ current = {'date': date, 'title': title, 'url': url, 'authors': '', 'summary': '', 'full_audio': None, 'download_base64': ''}
357
+ elif current:
358
+ if not current['authors']:
359
+ current['authors'] = line.strip('* ')
360
+ else:
361
+ current['summary'] += ' ' + line.strip() if current['summary'] else line.strip()
362
+ if current:
363
+ papers.append(current)
364
+ return papers[:20]
365
+
366
+ def generate_5min_feature_markdown(paper):
367
+ title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
368
+ pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
369
+ wct, sw = len(title.split()), len(summary.split())
370
+ terms = get_high_info_terms(summary, 15)
371
+ rouge = round((len(terms) / max(sw, 1)) * 100, 2)
372
+ mermaid = "```mermaid\nflowchart TD\n" + "\n".join(f' T{i+1}["{t}"] --> T{i+2}["{terms[i+1]}"]' for i in range(len(terms)-1)) + "\n```"
373
+ return f"""
374
+ ## 📄 {title}
375
+ **Authors:** {authors} | **Date:** {date} | **Words:** Title: {wct}, Summary: {sw}
376
+ **Links:** [Abstract]({url}) | [PDF]({pdf_url})
377
+ **Terms:** {', '.join(terms)} | **ROUGE:** {rouge}%
378
+ ### 🎤 TTF Read Aloud
379
+ - **Title:** {title} | **Terms:** {', '.join(terms)} | **ROUGE:** {rouge}%
380
+ #### Concepts Graph
381
+ {mermaid}
382
+ ---
383
+ """
384
+
385
+ def create_detailed_paper_md(papers):
386
+ return "# Detailed Summary\n" + "\n".join(generate_5min_feature_markdown(p) for p in papers)
387
+
388
+ async def create_paper_audio_files(papers, query):
389
+ for p in papers:
390
+ audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
391
+ p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'], p['authors'])
392
+ if p['full_audio']:
393
+ p['download_base64'] = get_download_link(p['full_audio'])
394
+
395
+ async def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
396
+ client = anthropic.Anthropic(api_key=anthropic_key)
397
+ response = client.messages.create(model="claude-3-sonnet-20240229", max_tokens=1000, messages=[{"role": "user", "content": q}])
398
+ result = response.content[0].text
399
+ st.markdown("### Claude's Reply 🧠\n" + result)
400
+ md_file = create_file(result, "System", "md")
401
+ audio_file, _ = await async_edge_tts_generate(result, st.session_state['tts_voice'], "System")
402
+ play_and_download_audio(audio_file)
403
+
404
+ if useArxiv:
405
+ q += result
406
+ gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
407
+ refs = gradio_client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
408
+ result = f"🔎 {q}\n\n{refs}"
409
+ md_file, audio_file = create_file(result, "System", "md"), (await async_edge_tts_generate(result, st.session_state['tts_voice'], "System"))[0]
410
+ play_and_download_audio(audio_file)
411
+ papers = parse_arxiv_refs(refs)
412
+ if papers and useArxivAudio:
413
+ await create_paper_audio_files(papers, q)
414
+ return result, papers
415
+ return result, []
416
+
417
+ def save_vote(file, item, user_hash):
418
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
419
+ entry = f"[{timestamp}] {user_hash} voted for {item}"
420
+ try:
421
+ with open(file, 'a') as f:
422
+ f.write(f"{entry}\n")
423
+ with open(HISTORY_FILE, 'a') as f:
424
+ f.write(f"- {timestamp} - User {user_hash} voted for {item}\n")
425
+ return True
426
+ except Exception as e:
427
+ print(f"Vote save flop: {e}")
428
+ return False
429
+
430
+ def load_votes(file):
431
+ if not os.path.exists(file):
432
+ with open(file, 'w') as f:
433
+ f.write("# Vote Tally\n\nNo votes yet - get clicking! 🖱️\n")
434
+ try:
435
+ with open(file, 'r') as f:
436
+ lines = f.read().strip().split('\n')
437
+ votes = {}
438
+ for line in lines[2:]: # Skip header
439
+ if line.strip() and 'voted for' in line:
440
+ item = line.split('voted for ')[1]
441
+ votes[item] = votes.get(item, 0) + 1
442
+ return votes
443
+ except Exception as e:
444
+ print(f"Vote load oopsie: {e}")
445
+ return {}
446
+
447
+ def generate_user_hash():
448
+ if 'user_hash' not in st.session_state:
449
+ session_id = str(random.getrandbits(128))
450
+ hash_object = hashlib.md5(session_id.encode())
451
+ st.session_state['user_hash'] = hash_object.hexdigest()[:8]
452
+ return st.session_state['user_hash']
453
+
454
+ async def save_pasted_image(image, username):
455
+ img_hash = hashlib.md5(image.tobytes()).hexdigest()[:8]
456
+ if img_hash in st.session_state.image_hashes:
457
+ return None
458
+ timestamp = format_timestamp_prefix(username)
459
+ filename = f"{timestamp}-{img_hash}.png"
460
+ filepath = filename
461
+ image.save(filepath, "PNG")
462
+ st.session_state.image_hashes.add(img_hash)
463
+ return filepath
464
+
465
+ # 📦 Zip Files
466
+ def create_zip_of_files(md_files, mp3_files, png_files, mp4_files, query):
467
+ all_files = md_files + mp3_files + png_files + mp4_files
468
+ if not all_files:
469
+ return None
470
+ terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in all_files] + [query]), 5)
471
+ zip_name = f"{format_timestamp_prefix()}_{'-'.join(terms)[:20]}.zip"
472
+ with zipfile.ZipFile(zip_name, 'w') as z:
473
+ [z.write(f) for f in all_files]
474
+ return zip_name
475
+
476
+ # 🎮 Main Interface
477
+ async def async_interface():
478
+ init_session_state()
479
+ load_mp3_viewer()
480
+ saved_username = load_username()
481
+ if saved_username and saved_username in FUN_USERNAMES:
482
+ st.session_state.username = saved_username
483
+ if not st.session_state.username:
484
+ available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in (await load_chat()).split('\n'))]
485
+ st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
486
+ st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username]
487
+ await save_chat_entry("System 🌟", f"{st.session_state.username} has joined {START_ROOM}!")
488
+ save_username(st.session_state.username)
489
+
490
+ st.title(f"{Site_Name} for {st.session_state.username}")
491
+ update_marquee_settings_ui()
492
+ display_marquee(f"🚀 Welcome to {START_ROOM} | 🤖 {st.session_state.username}", st.session_state['marquee_settings'], "welcome")
493
+
494
+ if not st.session_state.server_task:
495
+ st.session_state.server_task = asyncio.create_task(run_websocket_server())
496
+
497
+ tab_main = st.radio("Action:", ["🎤 Chat & Voice", "📸 Media", "🔍 ArXiv", "📚 PDF to Audio"], horizontal=True)
498
+ useArxiv, useArxivAudio = st.checkbox("Search ArXiv", True), st.checkbox("ArXiv Audio", False)
499
+ st.session_state.autosend = st.checkbox("Autosend Chat", value=True)
500
+ st.session_state.autosearch = st.checkbox("Autosearch ArXiv", value=True)
501
+
502
+ # 🎤 Chat & Voice
503
+ if tab_main == "🎤 Chat & Voice":
504
+ st.subheader(f"{START_ROOM} Chat 💬")
505
+ chat_content = await load_chat()
506
+ chat_container = st.container()
507
+ with chat_container:
508
+ lines = chat_content.split('\n')
509
+ for i, line in enumerate(lines):
510
+ if line.strip():
511
+ col1, col2 = st.columns([5, 1])
512
+ with col1:
513
+ st.markdown(line)
514
+ for mp3_name, mp3_path in st.session_state['mp3_files'].items():
515
+ if st.session_state.username in mp3_name and any(word in mp3_name for word in line.split()):
516
+ st.audio(mp3_path)
517
+ break
518
+ with col2:
519
+ if st.button(f"👍", key=f"chat_vote_{i}"):
520
+ user_hash = generate_user_hash()
521
+ save_vote(QUOTE_VOTES_FILE, line, user_hash)
522
+ st.session_state.timer_start = time.time()
523
+ save_username(st.session_state.username)
524
+ st.rerun()
525
+
526
+ message = st.text_input(f"Message as {st.session_state.username}", key="message_input")
527
+ paste_result = paste_image_button("📋 Paste Image or Text", key="paste_button_msg")
528
+ if paste_result.image_data is not None:
529
+ if isinstance(paste_result.image_data, str):
530
+ st.session_state.message_text = paste_result.image_data
531
+ message = st.text_input(f"Message as {st.session_state.username}", key="message_input_paste", value=st.session_state.message_text)
532
+ else:
533
+ st.image(paste_result.image_data, caption="Pasted Image")
534
+ filename = await save_pasted_image(paste_result.image_data, st.session_state.username)
535
+ if filename:
536
+ st.session_state.pasted_image_data = filename
537
+ if (message and message != st.session_state.last_message) or st.session_state.pasted_image_data:
538
+ st.session_state.last_message = message
539
+ if st.session_state.autosend or st.button("Send 🚀"):
540
+ if message.strip():
541
+ await save_chat_entry(st.session_state.username, message, True)
542
+ if st.session_state.pasted_image_data:
543
+ await save_chat_entry(st.session_state.username, f"Pasted image: {st.session_state.pasted_image_data}")
544
+ st.session_state.pasted_image_data = None
545
+ st.session_state.timer_start = time.time()
546
+ save_username(st.session_state.username)
547
+ st.rerun()
548
+
549
+ st.subheader("🎤 Speech-to-Chat")
550
+ from mycomponent import mycomponent
551
+ transcript_data = mycomponent(default_value=st.session_state.get('last_transcript', ''), key="speech_input")
552
+ if transcript_data and 'value' in transcript_data:
553
+ transcript = transcript_data['value'].strip()
554
+ if transcript and transcript != st.session_state.get('last_transcript', ''):
555
+ st.session_state.last_transcript = transcript
556
+ else:
557
+ transcript = st.session_state.get('last_transcript', '')
558
+
559
+ edited_transcript = st.text_area("✏️ Edit Transcript:", value=transcript, height=100, key="transcript_input")
560
+ if transcript:
561
+ st.write(f"🎙�� You said: {transcript}")
562
+
563
+ if edited_transcript and edited_transcript != st.session_state.get('last_sent_transcript', ''):
564
+ if st.session_state.autosend:
565
+ await save_chat_entry(st.session_state.username, edited_transcript, True)
566
+ st.session_state.last_sent_transcript = edited_transcript
567
+ st.session_state.timer_start = time.time()
568
+ save_username(st.session_state.username)
569
+ with chat_container:
570
+ st.markdown(await load_chat())
571
+ elif st.button("Send to Chat", key="send_transcript"):
572
+ await save_chat_entry(st.session_state.username, edited_transcript, True)
573
+ st.session_state.last_sent_transcript = edited_transcript
574
+ st.session_state.timer_start = time.time()
575
+ save_username(st.session_state.username)
576
+ st.rerun()
577
+ if not transcript:
578
+ st.write("🎙️ Speak to transcribe your message...")
579
+
580
+ # 📸 Media
581
+ elif tab_main == "📸 Media":
582
+ st.header("📸 Media Gallery")
583
+ all_files = sorted(glob.glob("*.md") + glob.glob("*.mp3") + glob.glob("*.png") + glob.glob("*.mp4"), key=os.path.getmtime, reverse=True)
584
+ md_files = [f for f in all_files if f.endswith('.md')]
585
+ mp3_files = [f for f in all_files if f.endswith('.mp3')]
586
+ png_files = [f for f in all_files if f.endswith('.png')]
587
+ mp4_files = [f for f in all_files if f.endswith('.mp4')]
588
+
589
+ st.subheader("All Submitted Text")
590
+ all_md_content = concatenate_markdown_files()
591
+ st.markdown(all_md_content)
592
+
593
+ st.subheader("🎵 Audio (MP3)")
594
+ for mp3 in mp3_files:
595
+ with st.expander(os.path.basename(mp3)):
596
+ st.audio(mp3)
597
+ st.markdown(get_download_link(mp3, "mp3"), unsafe_allow_html=True)
598
+
599
+ st.subheader("🖼️ Images (PNG)")
600
+ for png in png_files:
601
+ with st.expander(os.path.basename(png)):
602
+ st.image(png, use_container_width=True)
603
+ st.markdown(get_download_link(png, "png"), unsafe_allow_html=True)
604
+
605
+ st.subheader("🎥 Videos (MP4)")
606
+ for mp4 in mp4_files:
607
+ with st.expander(os.path.basename(mp4)):
608
+ st.video(mp4)
609
+ st.markdown(get_download_link(mp4, "mp4"), unsafe_allow_html=True)
610
+
611
+ uploaded_file = st.file_uploader("Upload Media", type=['png', 'mp4', 'mp3'])
612
+ if uploaded_file:
613
+ filename = f"{format_timestamp_prefix(st.session_state.username)}-{hashlib.md5(uploaded_file.getbuffer()).hexdigest()[:8]}.{uploaded_file.name.split('.')[-1]}"
614
+ with open(filename, 'wb') as f:
615
+ f.write(uploaded_file.getbuffer())
616
+ await save_chat_entry(st.session_state.username, f"Uploaded: {filename}")
617
+ st.session_state.timer_start = time.time()
618
+ save_username(st.session_state.username)
619
+ st.rerun()
620
+
621
+ # 🔍 ArXiv
622
+ elif tab_main == "🔍 ArXiv":
623
+ q = st.text_input("🔍 Query:", key="arxiv_query")
624
+ if q and q != st.session_state.last_query:
625
+ st.session_state.last_query = q
626
+ if st.session_state.autosearch or st.button("🔍 Run"):
627
+ result, papers = await perform_ai_lookup(q, useArxiv, useArxivAudio)
628
+ for i, p in enumerate(papers, 1):
629
+ with st.expander(f"{i}. 📄 {p['title']}"):
630
+ st.markdown(f"**{p['date']} | {p['title']}** — [Link]({p['url']})")
631
+ st.markdown(generate_5min_feature_markdown(p))
632
+ if p.get('full_audio'):
633
+ play_and_download_audio(p['full_audio'])
634
+
635
+ # 📚 PDF to Audio
636
+ elif tab_main == "📚 PDF to Audio":
637
+ audio_processor = AudioProcessor()
638
+ pdf_file = st.file_uploader("Choose PDF", "pdf")
639
+ max_pages = st.slider('Pages', 1, 100, 10)
640
+ if pdf_file:
641
+ with st.spinner('Processing...'):
642
+ texts, audios, total = process_pdf(pdf_file, max_pages, st.session_state['tts_voice'], audio_processor)
643
+ for i, text in enumerate(texts):
644
+ with st.expander(f"Page {i+1}"):
645
+ st.markdown(text)
646
+ while i not in audios:
647
+ time.sleep(0.1)
648
+ if audios.get(i):
649
+ st.audio(audios[i])
650
+ st.markdown(get_download_link(audios[i], "mp3"), unsafe_allow_html=True)
651
+ await save_chat_entry(st.session_state.username, f"PDF Page {i+1} converted to audio: {audios[i]}")
652
+
653
+ # 🗂️ Sidebar with Dialog and Audio
654
+ st.sidebar.subheader("Voice Settings")
655
+ new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
656
+ if new_username != st.session_state.username:
657
+ await save_chat_entry("System 🌟", f"{st.session_state.username} changed to {new_username}")
658
+ st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
659
+ st.session_state.timer_start = time.time()
660
+ save_username(st.session_state.username)
661
+ st.rerun()
662
+
663
+ st.sidebar.markdown("### 💬 Chat Dialog & Media")
664
+ chat_content = await load_chat()
665
+ lines = chat_content.split('\n')
666
+ all_files = sorted(glob.glob("*.md") + glob.glob("*.mp3") + glob.glob("*.png") + glob.glob("*.mp4"), key=os.path.getmtime, reverse=True)
667
+ for line in lines[-10:]:
668
+ if line.strip():
669
+ st.sidebar.markdown(f"**{line}**")
670
+ for f in all_files:
671
+ f_name = os.path.basename(f)
672
+ if st.session_state.username in f_name and any(word in f_name for word in line.split()):
673
+ if f.endswith('.mp3'):
674
+ st.sidebar.audio(f)
675
+ st.sidebar.markdown(get_download_link(f, "mp3"), unsafe_allow_html=True)
676
+ elif f.endswith('.png'):
677
+ st.sidebar.image(f, use_container_width=True)
678
+ st.sidebar.markdown(get_download_link(f, "png"), unsafe_allow_html=True)
679
+ elif f.endswith('.mp4'):
680
+ st.sidebar.video(f)
681
+ st.sidebar.markdown(get_download_link(f, "mp4"), unsafe_allow_html=True)
682
+ break
683
+
684
+ st.sidebar.subheader("Vote Totals")
685
+ chat_votes = load_votes(QUOTE_VOTES_FILE)
686
+ image_votes = load_votes(IMAGE_VOTES_FILE)
687
+ for item, count in chat_votes.items():
688
+ st.sidebar.write(f"{item}: {count} votes")
689
+ for image, count in image_votes.items():
690
+ st.sidebar.write(f"{image}: {count} votes")
691
+
692
+ md_files = [f for f in all_files if f.endswith('.md')]
693
+ mp3_files = [f for f in all_files if f.endswith('.mp3')]
694
+ png_files = [f for f in all_files if f.endswith('.png')]
695
+ mp4_files = [f for f in all_files if f.endswith('.mp4')]
696
+ st.sidebar.markdown("### 📂 File History")
697
+ for f in all_files[:10]:
698
+ st.sidebar.write(f"{FILE_EMOJIS.get(f.split('.')[-1], '📄')} {os.path.basename(f)}")
699
+ if st.sidebar.button("⬇️ Zip All"):
700
+ zip_name = create_zip_of_files(md_files, mp3_files, png_files, mp4_files, "latest_query")
701
+ if zip_name:
702
+ st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
703
+
704
+ def main():
705
+ asyncio.run(async_interface())
706
+
707
+ if __name__ == "__main__":
708
+ main()