Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
-
# ๐ Main App - TalkingAIResearcher with Chat, Voice, Media, ArXiv, and More
|
2 |
import streamlit as st
|
3 |
import asyncio
|
4 |
import websockets
|
5 |
import uuid
|
6 |
import argparse
|
|
|
7 |
import os
|
8 |
import random
|
9 |
import time
|
@@ -17,6 +17,7 @@ import edge_tts
|
|
17 |
from audio_recorder_streamlit import audio_recorder
|
18 |
import nest_asyncio
|
19 |
import re
|
|
|
20 |
import pytz
|
21 |
import shutil
|
22 |
import anthropic
|
@@ -28,11 +29,10 @@ import zipfile
|
|
28 |
from gradio_client import Client
|
29 |
from dotenv import load_dotenv
|
30 |
from streamlit_marquee import streamlit_marquee
|
31 |
-
from datetime import datetime
|
32 |
from collections import defaultdict, Counter
|
33 |
import pandas as pd
|
34 |
|
35 |
-
# ๐ ๏ธ Patch asyncio for nesting
|
36 |
nest_asyncio.apply()
|
37 |
|
38 |
# ๐จ Page Config
|
@@ -57,8 +57,18 @@ FUN_USERNAMES = {
|
|
57 |
"GalacticGopher ๐": "en-AU-WilliamNeural",
|
58 |
"RocketRaccoon ๐": "en-CA-LiamNeural",
|
59 |
"EchoElf ๐ง": "en-US-AnaNeural",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
}
|
61 |
-
EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))
|
62 |
FILE_EMOJIS = {"md": "๐", "mp3": "๐ต", "wav": "๐"}
|
63 |
|
64 |
# ๐ Directories
|
@@ -69,7 +79,7 @@ CHAT_FILE = "chat_logs/global_chat.md"
|
|
69 |
HISTORY_FILE = "history_logs/chat_history.md"
|
70 |
MEDIA_DIR = "media_files"
|
71 |
AUDIO_CACHE_DIR = "audio_cache"
|
72 |
-
AUDIO_DIR = "audio_logs"
|
73 |
|
74 |
# ๐ API Keys
|
75 |
load_dotenv()
|
@@ -115,7 +125,6 @@ def init_session_state():
|
|
115 |
|
116 |
# ๐๏ธ Marquee Helpers
|
117 |
def update_marquee_settings_ui():
|
118 |
-
# ๐จ Sidebar marquee controls
|
119 |
st.sidebar.markdown("### ๐ฏ Marquee Settings")
|
120 |
cols = st.sidebar.columns(2)
|
121 |
with cols[0]:
|
@@ -126,7 +135,6 @@ def update_marquee_settings_ui():
|
|
126 |
st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('โฑ๏ธ Speed', 1, 20, 20)}s"
|
127 |
|
128 |
def display_marquee(text, settings, key_suffix=""):
|
129 |
-
# ๐ Show marquee with truncation
|
130 |
truncated = text[:280] + "..." if len(text) > 280 else text
|
131 |
streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
|
132 |
st.write("")
|
@@ -142,7 +150,6 @@ def get_high_info_terms(text, top_n=10):
|
|
142 |
return [t for t, _ in Counter(filtered).most_common(top_n)]
|
143 |
|
144 |
def generate_filename(prompt, response, file_type="md"):
|
145 |
-
# ๐ Smart filename with info terms
|
146 |
prefix = format_timestamp_prefix()
|
147 |
terms = get_high_info_terms(prompt + " " + response, 5)
|
148 |
snippet = clean_text_for_filename(prompt[:40] + " " + response[:40])
|
@@ -152,13 +159,11 @@ def generate_filename(prompt, response, file_type="md"):
|
|
152 |
return f"{prefix}{base}_wct{wct}_sw{sw}_dur{dur}.{file_type}"
|
153 |
|
154 |
def create_file(prompt, response, file_type="md"):
|
155 |
-
# ๐ Save file with Q&A
|
156 |
filename = generate_filename(prompt, response, file_type)
|
157 |
with open(filename, 'w', encoding='utf-8') as f: f.write(prompt + "\n\n" + response)
|
158 |
return filename
|
159 |
|
160 |
def get_download_link(file, file_type="mp3"):
|
161 |
-
# โฌ๏ธ Cached download link
|
162 |
cache_key = f"dl_{file}"
|
163 |
if cache_key not in st.session_state['download_link_cache']:
|
164 |
with open(file, "rb") as f:
|
@@ -168,7 +173,6 @@ def get_download_link(file, file_type="mp3"):
|
|
168 |
|
169 |
# ๐ถ Audio Processing
|
170 |
async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_format="mp3"):
|
171 |
-
# ๐ต Async TTS with caching and .md generation
|
172 |
cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
|
173 |
if cache_key in st.session_state['audio_cache']: return st.session_state['audio_cache'][cache_key], 0
|
174 |
start_time = time.time()
|
@@ -179,7 +183,6 @@ async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_f
|
|
179 |
await communicate.save(filename)
|
180 |
st.session_state['audio_cache'][cache_key] = filename
|
181 |
|
182 |
-
# Generate .md file
|
183 |
md_filename = filename.replace(".mp3", ".md")
|
184 |
md_content = f"# Chat Audio Log\n\n**Player:** {username}\n**Voice:** {voice}\n**Text:**\n```markdown\n{text}\n```"
|
185 |
with open(md_filename, 'w', encoding='utf-8') as f: f.write(md_content)
|
@@ -187,13 +190,11 @@ async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_f
|
|
187 |
return filename, time.time() - start_time
|
188 |
|
189 |
def play_and_download_audio(file_path):
|
190 |
-
# ๐ Play + download
|
191 |
if file_path and os.path.exists(file_path):
|
192 |
st.audio(file_path)
|
193 |
st.markdown(get_download_link(file_path), unsafe_allow_html=True)
|
194 |
|
195 |
def load_mp3_viewer():
|
196 |
-
# ๐ต Load all MP3s at startup
|
197 |
mp3_files = glob.glob(f"{AUDIO_DIR}/*.mp3")
|
198 |
for mp3 in mp3_files:
|
199 |
filename = os.path.basename(mp3)
|
@@ -201,7 +202,6 @@ def load_mp3_viewer():
|
|
201 |
st.session_state['mp3_files'][filename] = mp3
|
202 |
|
203 |
async def save_chat_entry(username, message, is_markdown=False):
|
204 |
-
# ๐ฌ Save chat with multicast broadcast and audio
|
205 |
central = pytz.timezone('US/Central')
|
206 |
timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
|
207 |
entry = f"[{timestamp}] {username}: {message}" if not is_markdown else f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
|
@@ -217,7 +217,6 @@ async def save_chat_entry(username, message, is_markdown=False):
|
|
217 |
return audio_file
|
218 |
|
219 |
async def load_chat():
|
220 |
-
# ๐ Load chat history - Numbered
|
221 |
if not os.path.exists(CHAT_FILE):
|
222 |
with open(CHAT_FILE, 'a') as f: f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! ๐ค\n")
|
223 |
with open(CHAT_FILE, 'r') as f:
|
@@ -228,7 +227,6 @@ async def load_chat():
|
|
228 |
|
229 |
# ๐ WebSocket Handling
|
230 |
async def websocket_handler(websocket, path):
|
231 |
-
# ๐ค Handle WebSocket clients
|
232 |
client_id = str(uuid.uuid4())
|
233 |
room_id = "chat"
|
234 |
if room_id not in st.session_state.active_connections:
|
@@ -252,7 +250,6 @@ async def websocket_handler(websocket, path):
|
|
252 |
del st.session_state.active_connections[room_id][client_id]
|
253 |
|
254 |
async def broadcast_message(message, room_id):
|
255 |
-
# ๐ข Broadcast to all clients
|
256 |
if room_id in st.session_state.active_connections:
|
257 |
disconnected = []
|
258 |
for client_id, ws in st.session_state.active_connections[room_id].items():
|
@@ -265,7 +262,6 @@ async def broadcast_message(message, room_id):
|
|
265 |
del st.session_state.active_connections[room_id][client_id]
|
266 |
|
267 |
async def run_websocket_server():
|
268 |
-
# ๐ฅ๏ธ Start WebSocket server
|
269 |
if not st.session_state.server_running:
|
270 |
server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
|
271 |
st.session_state.server_running = True
|
@@ -282,7 +278,6 @@ class AudioProcessor:
|
|
282 |
with open(f"{self.cache_dir}/metadata.json", 'w') as f: json.dump(self.metadata, f)
|
283 |
|
284 |
async def create_audio(self, text, voice='en-US-AriaNeural'):
|
285 |
-
# ๐ถ Generate cached audio
|
286 |
cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
|
287 |
cache_path = f"{self.cache_dir}/{cache_key}.mp3"
|
288 |
if cache_key in self.metadata and os.path.exists(cache_path):
|
@@ -296,7 +291,6 @@ class AudioProcessor:
|
|
296 |
return open(cache_path, 'rb').read()
|
297 |
|
298 |
def process_pdf(pdf_file, max_pages, voice, audio_processor):
|
299 |
-
# ๐ Convert PDF to audio
|
300 |
reader = PdfReader(pdf_file)
|
301 |
total_pages = min(len(reader.pages), max_pages)
|
302 |
texts, audios = [], {}
|
@@ -309,7 +303,6 @@ def process_pdf(pdf_file, max_pages, voice, audio_processor):
|
|
309 |
|
310 |
# ๐ ArXiv & AI Lookup
|
311 |
def parse_arxiv_refs(ref_text):
|
312 |
-
# ๐ Parse ArXiv refs into dicts
|
313 |
if not ref_text: return []
|
314 |
papers = []
|
315 |
current = {}
|
@@ -326,7 +319,6 @@ def parse_arxiv_refs(ref_text):
|
|
326 |
return papers[:20]
|
327 |
|
328 |
def generate_5min_feature_markdown(paper):
|
329 |
-
# โจ 5-min research paper feature
|
330 |
title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
|
331 |
pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
|
332 |
wct, sw = len(title.split()), len(summary.split())
|
@@ -348,14 +340,12 @@ def generate_5min_feature_markdown(paper):
|
|
348 |
def create_detailed_paper_md(papers): return "# Detailed Summary\n" + "\n".join(generate_5min_feature_markdown(p) for p in papers)
|
349 |
|
350 |
async def create_paper_audio_files(papers, query):
|
351 |
-
# ๐ง Generate paper audio
|
352 |
for p in papers:
|
353 |
audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
|
354 |
p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'], p['authors'])
|
355 |
if p['full_audio']: p['download_base64'] = get_download_link(p['full_audio'])
|
356 |
|
357 |
async def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
|
358 |
-
# ๐ฎ AI-powered research
|
359 |
client = anthropic.Anthropic(api_key=anthropic_key)
|
360 |
response = client.messages.create(model="claude-3-sonnet-20240229", max_tokens=1000, messages=[{"role": "user", "content": q}])
|
361 |
result = response.content[0].text
|
@@ -378,7 +368,6 @@ async def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
|
|
378 |
|
379 |
# ๐ฆ Zip Files
|
380 |
def create_zip_of_files(md_files, mp3_files, query):
|
381 |
-
# ๐ฆ Zip it up
|
382 |
all_files = md_files + mp3_files
|
383 |
if not all_files: return None
|
384 |
terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in all_files] + [query]), 5)
|
@@ -389,7 +378,7 @@ def create_zip_of_files(md_files, mp3_files, query):
|
|
389 |
# ๐ฎ Main Interface
|
390 |
async def async_interface():
|
391 |
init_session_state()
|
392 |
-
load_mp3_viewer()
|
393 |
if not st.session_state.username:
|
394 |
available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in (await load_chat()).split('\n'))]
|
395 |
st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
|
@@ -418,7 +407,6 @@ async def async_interface():
|
|
418 |
for i, line in enumerate(lines):
|
419 |
if line.strip():
|
420 |
st.markdown(line)
|
421 |
-
# Match MP3 to line by timestamp and username
|
422 |
for mp3_name, mp3_path in st.session_state['mp3_files'].items():
|
423 |
if line.strip() in mp3_name and st.session_state.username in mp3_name:
|
424 |
st.audio(mp3_path, key=f"audio_{i}_{mp3_name}")
|
@@ -432,7 +420,7 @@ async def async_interface():
|
|
432 |
st.rerun()
|
433 |
|
434 |
st.subheader("๐ค Speech-to-Chat")
|
435 |
-
|
436 |
transcript_data = speech_component(default_value=st.session_state.get('last_transcript', ''))
|
437 |
if transcript_data and 'value' in transcript_data:
|
438 |
transcript = transcript_data['value'].strip()
|
@@ -498,7 +486,7 @@ async def async_interface():
|
|
498 |
st.audio(audios[i], format='audio/mp3')
|
499 |
st.markdown(get_download_link(io.BytesIO(audios[i]), "mp3"), unsafe_allow_html=True)
|
500 |
|
501 |
-
# ๐๏ธ Sidebar
|
502 |
st.sidebar.subheader("Voice Settings")
|
503 |
new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
|
504 |
if new_username != st.session_state.username:
|
@@ -506,6 +494,20 @@ async def async_interface():
|
|
506 |
st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
|
507 |
st.rerun()
|
508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
md_files, mp3_files = glob.glob("*.md"), glob.glob(f"{AUDIO_DIR}/*.mp3")
|
510 |
st.sidebar.markdown("### ๐ File History")
|
511 |
for f in sorted(md_files + mp3_files, key=os.path.getmtime, reverse=True)[:10]:
|
@@ -515,7 +517,6 @@ async def async_interface():
|
|
515 |
if zip_name: st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
|
516 |
|
517 |
def main():
|
518 |
-
# ๐ Kick it off
|
519 |
asyncio.run(async_interface())
|
520 |
|
521 |
if __name__ == "__main__":
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import asyncio
|
3 |
import websockets
|
4 |
import uuid
|
5 |
import argparse
|
6 |
+
from datetime import datetime
|
7 |
import os
|
8 |
import random
|
9 |
import time
|
|
|
17 |
from audio_recorder_streamlit import audio_recorder
|
18 |
import nest_asyncio
|
19 |
import re
|
20 |
+
from streamlit_paste_button import paste_image_button
|
21 |
import pytz
|
22 |
import shutil
|
23 |
import anthropic
|
|
|
29 |
from gradio_client import Client
|
30 |
from dotenv import load_dotenv
|
31 |
from streamlit_marquee import streamlit_marquee
|
|
|
32 |
from collections import defaultdict, Counter
|
33 |
import pandas as pd
|
34 |
|
35 |
+
# ๐ ๏ธ Patch asyncio for nesting
|
36 |
nest_asyncio.apply()
|
37 |
|
38 |
# ๐จ Page Config
|
|
|
57 |
"GalacticGopher ๐": "en-AU-WilliamNeural",
|
58 |
"RocketRaccoon ๐": "en-CA-LiamNeural",
|
59 |
"EchoElf ๐ง": "en-US-AnaNeural",
|
60 |
+
"PhantomFox ๐ฆ": "en-US-BrandonNeural",
|
61 |
+
"WittyWizard ๐ง": "en-GB-ThomasNeural",
|
62 |
+
"LunarLlama ๐": "en-AU-FreyaNeural",
|
63 |
+
"SolarSloth โ๏ธ": "en-CA-LindaNeural",
|
64 |
+
"AstroAlpaca ๐ฆ": "en-US-ChristopherNeural",
|
65 |
+
"CyberCoyote ๐บ": "en-GB-ElliotNeural",
|
66 |
+
"MysticMoose ๐ฆ": "en-AU-JamesNeural",
|
67 |
+
"GlitchGnome ๐ง": "en-CA-EthanNeural",
|
68 |
+
"VortexViper ๐": "en-US-AmberNeural",
|
69 |
+
"ChronoChimp ๐": "en-GB-LibbyNeural"
|
70 |
}
|
71 |
+
EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))
|
72 |
FILE_EMOJIS = {"md": "๐", "mp3": "๐ต", "wav": "๐"}
|
73 |
|
74 |
# ๐ Directories
|
|
|
79 |
HISTORY_FILE = "history_logs/chat_history.md"
|
80 |
MEDIA_DIR = "media_files"
|
81 |
AUDIO_CACHE_DIR = "audio_cache"
|
82 |
+
AUDIO_DIR = "audio_logs"
|
83 |
|
84 |
# ๐ API Keys
|
85 |
load_dotenv()
|
|
|
125 |
|
126 |
# ๐๏ธ Marquee Helpers
|
127 |
def update_marquee_settings_ui():
|
|
|
128 |
st.sidebar.markdown("### ๐ฏ Marquee Settings")
|
129 |
cols = st.sidebar.columns(2)
|
130 |
with cols[0]:
|
|
|
135 |
st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('โฑ๏ธ Speed', 1, 20, 20)}s"
|
136 |
|
137 |
def display_marquee(text, settings, key_suffix=""):
|
|
|
138 |
truncated = text[:280] + "..." if len(text) > 280 else text
|
139 |
streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
|
140 |
st.write("")
|
|
|
150 |
return [t for t, _ in Counter(filtered).most_common(top_n)]
|
151 |
|
152 |
def generate_filename(prompt, response, file_type="md"):
|
|
|
153 |
prefix = format_timestamp_prefix()
|
154 |
terms = get_high_info_terms(prompt + " " + response, 5)
|
155 |
snippet = clean_text_for_filename(prompt[:40] + " " + response[:40])
|
|
|
159 |
return f"{prefix}{base}_wct{wct}_sw{sw}_dur{dur}.{file_type}"
|
160 |
|
161 |
def create_file(prompt, response, file_type="md"):
|
|
|
162 |
filename = generate_filename(prompt, response, file_type)
|
163 |
with open(filename, 'w', encoding='utf-8') as f: f.write(prompt + "\n\n" + response)
|
164 |
return filename
|
165 |
|
166 |
def get_download_link(file, file_type="mp3"):
|
|
|
167 |
cache_key = f"dl_{file}"
|
168 |
if cache_key not in st.session_state['download_link_cache']:
|
169 |
with open(file, "rb") as f:
|
|
|
173 |
|
174 |
# ๐ถ Audio Processing
|
175 |
async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_format="mp3"):
|
|
|
176 |
cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
|
177 |
if cache_key in st.session_state['audio_cache']: return st.session_state['audio_cache'][cache_key], 0
|
178 |
start_time = time.time()
|
|
|
183 |
await communicate.save(filename)
|
184 |
st.session_state['audio_cache'][cache_key] = filename
|
185 |
|
|
|
186 |
md_filename = filename.replace(".mp3", ".md")
|
187 |
md_content = f"# Chat Audio Log\n\n**Player:** {username}\n**Voice:** {voice}\n**Text:**\n```markdown\n{text}\n```"
|
188 |
with open(md_filename, 'w', encoding='utf-8') as f: f.write(md_content)
|
|
|
190 |
return filename, time.time() - start_time
|
191 |
|
192 |
def play_and_download_audio(file_path):
|
|
|
193 |
if file_path and os.path.exists(file_path):
|
194 |
st.audio(file_path)
|
195 |
st.markdown(get_download_link(file_path), unsafe_allow_html=True)
|
196 |
|
197 |
def load_mp3_viewer():
|
|
|
198 |
mp3_files = glob.glob(f"{AUDIO_DIR}/*.mp3")
|
199 |
for mp3 in mp3_files:
|
200 |
filename = os.path.basename(mp3)
|
|
|
202 |
st.session_state['mp3_files'][filename] = mp3
|
203 |
|
204 |
async def save_chat_entry(username, message, is_markdown=False):
|
|
|
205 |
central = pytz.timezone('US/Central')
|
206 |
timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
|
207 |
entry = f"[{timestamp}] {username}: {message}" if not is_markdown else f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
|
|
|
217 |
return audio_file
|
218 |
|
219 |
async def load_chat():
|
|
|
220 |
if not os.path.exists(CHAT_FILE):
|
221 |
with open(CHAT_FILE, 'a') as f: f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! ๐ค\n")
|
222 |
with open(CHAT_FILE, 'r') as f:
|
|
|
227 |
|
228 |
# ๐ WebSocket Handling
|
229 |
async def websocket_handler(websocket, path):
|
|
|
230 |
client_id = str(uuid.uuid4())
|
231 |
room_id = "chat"
|
232 |
if room_id not in st.session_state.active_connections:
|
|
|
250 |
del st.session_state.active_connections[room_id][client_id]
|
251 |
|
252 |
async def broadcast_message(message, room_id):
|
|
|
253 |
if room_id in st.session_state.active_connections:
|
254 |
disconnected = []
|
255 |
for client_id, ws in st.session_state.active_connections[room_id].items():
|
|
|
262 |
del st.session_state.active_connections[room_id][client_id]
|
263 |
|
264 |
async def run_websocket_server():
|
|
|
265 |
if not st.session_state.server_running:
|
266 |
server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
|
267 |
st.session_state.server_running = True
|
|
|
278 |
with open(f"{self.cache_dir}/metadata.json", 'w') as f: json.dump(self.metadata, f)
|
279 |
|
280 |
async def create_audio(self, text, voice='en-US-AriaNeural'):
|
|
|
281 |
cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
|
282 |
cache_path = f"{self.cache_dir}/{cache_key}.mp3"
|
283 |
if cache_key in self.metadata and os.path.exists(cache_path):
|
|
|
291 |
return open(cache_path, 'rb').read()
|
292 |
|
293 |
def process_pdf(pdf_file, max_pages, voice, audio_processor):
|
|
|
294 |
reader = PdfReader(pdf_file)
|
295 |
total_pages = min(len(reader.pages), max_pages)
|
296 |
texts, audios = [], {}
|
|
|
303 |
|
304 |
# ๐ ArXiv & AI Lookup
|
305 |
def parse_arxiv_refs(ref_text):
|
|
|
306 |
if not ref_text: return []
|
307 |
papers = []
|
308 |
current = {}
|
|
|
319 |
return papers[:20]
|
320 |
|
321 |
def generate_5min_feature_markdown(paper):
|
|
|
322 |
title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
|
323 |
pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
|
324 |
wct, sw = len(title.split()), len(summary.split())
|
|
|
340 |
def create_detailed_paper_md(papers): return "# Detailed Summary\n" + "\n".join(generate_5min_feature_markdown(p) for p in papers)
|
341 |
|
342 |
async def create_paper_audio_files(papers, query):
|
|
|
343 |
for p in papers:
|
344 |
audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
|
345 |
p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'], p['authors'])
|
346 |
if p['full_audio']: p['download_base64'] = get_download_link(p['full_audio'])
|
347 |
|
348 |
async def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
|
|
|
349 |
client = anthropic.Anthropic(api_key=anthropic_key)
|
350 |
response = client.messages.create(model="claude-3-sonnet-20240229", max_tokens=1000, messages=[{"role": "user", "content": q}])
|
351 |
result = response.content[0].text
|
|
|
368 |
|
369 |
# ๐ฆ Zip Files
|
370 |
def create_zip_of_files(md_files, mp3_files, query):
|
|
|
371 |
all_files = md_files + mp3_files
|
372 |
if not all_files: return None
|
373 |
terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in all_files] + [query]), 5)
|
|
|
378 |
# ๐ฎ Main Interface
|
379 |
async def async_interface():
|
380 |
init_session_state()
|
381 |
+
load_mp3_viewer()
|
382 |
if not st.session_state.username:
|
383 |
available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in (await load_chat()).split('\n'))]
|
384 |
st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
|
|
|
407 |
for i, line in enumerate(lines):
|
408 |
if line.strip():
|
409 |
st.markdown(line)
|
|
|
410 |
for mp3_name, mp3_path in st.session_state['mp3_files'].items():
|
411 |
if line.strip() in mp3_name and st.session_state.username in mp3_name:
|
412 |
st.audio(mp3_path, key=f"audio_{i}_{mp3_name}")
|
|
|
420 |
st.rerun()
|
421 |
|
422 |
st.subheader("๐ค Speech-to-Chat")
|
423 |
+
from mycomponent import speech_component
|
424 |
transcript_data = speech_component(default_value=st.session_state.get('last_transcript', ''))
|
425 |
if transcript_data and 'value' in transcript_data:
|
426 |
transcript = transcript_data['value'].strip()
|
|
|
486 |
st.audio(audios[i], format='audio/mp3')
|
487 |
st.markdown(get_download_link(io.BytesIO(audios[i]), "mp3"), unsafe_allow_html=True)
|
488 |
|
489 |
+
# ๐๏ธ Sidebar with Dialog and Audio
|
490 |
st.sidebar.subheader("Voice Settings")
|
491 |
new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
|
492 |
if new_username != st.session_state.username:
|
|
|
494 |
st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
|
495 |
st.rerun()
|
496 |
|
497 |
+
st.sidebar.markdown("### ๐ฌ Chat Dialog & Audio")
|
498 |
+
chat_content = await load_chat()
|
499 |
+
lines = chat_content.split('\n')
|
500 |
+
audio_files = sorted(glob.glob(f"{AUDIO_DIR}/*.mp3"), key=os.path.getmtime, reverse=True)
|
501 |
+
for line in lines[-10:]: # Show last 10 lines for brevity
|
502 |
+
if line.strip():
|
503 |
+
st.sidebar.markdown(f"**{line}**")
|
504 |
+
for mp3 in audio_files:
|
505 |
+
mp3_name = os.path.basename(mp3)
|
506 |
+
if st.session_state.username in mp3_name and any(word in mp3_name for word in line.split()):
|
507 |
+
st.sidebar.audio(mp3, key=f"sidebar_audio_{mp3_name}")
|
508 |
+
st.sidebar.markdown(get_download_link(mp3), unsafe_allow_html=True)
|
509 |
+
break
|
510 |
+
|
511 |
md_files, mp3_files = glob.glob("*.md"), glob.glob(f"{AUDIO_DIR}/*.mp3")
|
512 |
st.sidebar.markdown("### ๐ File History")
|
513 |
for f in sorted(md_files + mp3_files, key=os.path.getmtime, reverse=True)[:10]:
|
|
|
517 |
if zip_name: st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
|
518 |
|
519 |
def main():
|
|
|
520 |
asyncio.run(async_interface())
|
521 |
|
522 |
if __name__ == "__main__":
|