|
import streamlit as st |
|
import asyncio |
|
import websockets |
|
import uuid |
|
import argparse |
|
from datetime import datetime |
|
import os |
|
import random |
|
import time |
|
import hashlib |
|
from PIL import Image |
|
import glob |
|
import base64 |
|
import io |
|
import streamlit.components.v1 as components |
|
import edge_tts |
|
from audio_recorder_streamlit import audio_recorder |
|
import nest_asyncio |
|
import re |
|
from streamlit_paste_button import paste_image_button |
|
import pytz |
|
import shutil |
|
import anthropic |
|
import openai |
|
from PyPDF2 import PdfReader |
|
import threading |
|
import json |
|
import zipfile |
|
from gradio_client import Client |
|
from dotenv import load_dotenv |
|
from streamlit_marquee import streamlit_marquee |
|
|
|
|
|
nest_asyncio.apply() |
|
|
|
|
|
icons = 'π€π§ π¬π' |
|
START_ROOM = "Sector π" |
|
|
|
|
|
st.set_page_config( |
|
page_title="π€π§ MMO Chat & Research Brainππ¬", |
|
page_icon=icons, |
|
layout="wide", |
|
initial_sidebar_state="auto" |
|
) |
|
|
|
|
|
FUN_USERNAMES = { |
|
"CosmicJester π": "en-US-AriaNeural", |
|
"PixelPanda πΌ": "en-US-JennyNeural", |
|
"QuantumQuack π¦": "en-GB-SoniaNeural", |
|
"StellarSquirrel πΏοΈ": "en-AU-NatashaNeural", |
|
"GizmoGuru βοΈ": "en-CA-ClaraNeural", |
|
"NebulaNinja π ": "en-US-GuyNeural", |
|
"ByteBuster πΎ": "en-GB-RyanNeural", |
|
"GalacticGopher π": "en-AU-WilliamNeural", |
|
"RocketRaccoon π": "en-CA-LiamNeural", |
|
"EchoElf π§": "en-US-AnaNeural", |
|
"PhantomFox π¦": "en-US-BrandonNeural", |
|
"WittyWizard π§": "en-GB-ThomasNeural", |
|
"LunarLlama π": "en-AU-FreyaNeural", |
|
"SolarSloth βοΈ": "en-CA-LindaNeural", |
|
"AstroAlpaca π¦": "en-US-ChristopherNeural", |
|
"CyberCoyote πΊ": "en-GB-ElliotNeural", |
|
"MysticMoose π¦": "en-AU-JamesNeural", |
|
"GlitchGnome π§": "en-CA-EthanNeural", |
|
"VortexViper π": "en-US-AmberNeural", |
|
"ChronoChimp π": "en-GB-LibbyNeural" |
|
} |
|
|
|
|
|
CHAT_DIR = "chat_logs" |
|
VOTE_DIR = "vote_logs" |
|
AUDIO_DIR = "audio_logs" |
|
HISTORY_DIR = "history_logs" |
|
MEDIA_DIR = "media_files" |
|
os.makedirs(CHAT_DIR, exist_ok=True) |
|
os.makedirs(VOTE_DIR, exist_ok=True) |
|
os.makedirs(AUDIO_DIR, exist_ok=True) |
|
os.makedirs(HISTORY_DIR, exist_ok=True) |
|
os.makedirs(MEDIA_DIR, exist_ok=True) |
|
|
|
CHAT_FILE = os.path.join(CHAT_DIR, "global_chat.md") |
|
QUOTE_VOTES_FILE = os.path.join(VOTE_DIR, "quote_votes.md") |
|
MEDIA_VOTES_FILE = os.path.join(VOTE_DIR, "media_votes.md") |
|
HISTORY_FILE = os.path.join(HISTORY_DIR, "chat_history.md") |
|
|
|
|
|
UNICODE_DIGITS = {i: f"{i}\uFE0Fβ£" for i in range(10)} |
|
|
|
|
|
UNICODE_FONTS = [ |
|
("Normal", lambda x: x), |
|
("Bold", lambda x: "".join(chr(ord(c) + 0x1D400 - 0x41) if 'A' <= c <= 'Z' else chr(ord(c) + 0x1D41A - 0x61) if 'a' <= c <= 'z' else c for c in x)), |
|
|
|
] |
|
|
|
|
|
if 'server_running' not in st.session_state: |
|
st.session_state.server_running = False |
|
if 'server_task' not in st.session_state: |
|
st.session_state.server_task = None |
|
if 'active_connections' not in st.session_state: |
|
st.session_state.active_connections = {} |
|
if 'media_notifications' not in st.session_state: |
|
st.session_state.media_notifications = [] |
|
if 'last_chat_update' not in st.session_state: |
|
st.session_state.last_chat_update = 0 |
|
if 'displayed_chat_lines' not in st.session_state: |
|
st.session_state.displayed_chat_lines = [] |
|
if 'message_text' not in st.session_state: |
|
st.session_state.message_text = "" |
|
if 'audio_cache' not in st.session_state: |
|
st.session_state.audio_cache = {} |
|
if 'pasted_image_data' not in st.session_state: |
|
st.session_state.pasted_image_data = None |
|
if 'quote_line' not in st.session_state: |
|
st.session_state.quote_line = None |
|
if 'refresh_rate' not in st.session_state: |
|
st.session_state.refresh_rate = 5 |
|
if 'base64_cache' not in st.session_state: |
|
st.session_state.base64_cache = {} |
|
if 'transcript_history' not in st.session_state: |
|
st.session_state.transcript_history = [] |
|
if 'last_transcript' not in st.session_state: |
|
st.session_state.last_transcript = "" |
|
if 'image_hashes' not in st.session_state: |
|
st.session_state.image_hashes = set() |
|
if 'tts_voice' not in st.session_state: |
|
st.session_state.tts_voice = "en-US-AriaNeural" |
|
if 'chat_history' not in st.session_state: |
|
st.session_state.chat_history = [] |
|
|
|
|
|
load_dotenv() |
|
anthropic_key = os.getenv('ANTHROPIC_API_KEY', "") |
|
openai_api_key = os.getenv('OPENAI_API_KEY', "") |
|
if 'ANTHROPIC_API_KEY' in st.secrets: |
|
anthropic_key = st.secrets['ANTHROPIC_API_KEY'] |
|
if 'OPENAI_API_KEY' in st.secrets: |
|
openai_api_key = st.secrets['OPENAI_API_KEY'] |
|
openai_client = openai.OpenAI(api_key=openai_api_key) |
|
|
|
|
|
def format_timestamp_prefix(username): |
|
central = pytz.timezone('US/Central') |
|
now = datetime.now(central) |
|
return f"{now.strftime('%I-%M-%p-ct-%m-%d-%Y')}-by-{username}" |
|
|
|
|
|
def compute_image_hash(image_data): |
|
if isinstance(image_data, Image.Image): |
|
img_byte_arr = io.BytesIO() |
|
image_data.save(img_byte_arr, format='PNG') |
|
img_bytes = img_byte_arr.getvalue() |
|
else: |
|
img_bytes = image_data |
|
return hashlib.md5(img_bytes).hexdigest()[:8] |
|
|
|
|
|
def get_node_name(): |
|
parser = argparse.ArgumentParser(description='Start a chat node') |
|
parser.add_argument('--node-name', type=str, default=None) |
|
parser.add_argument('--port', type=int, default=8501) |
|
args = parser.parse_args() |
|
return args.node_name or f"node-{uuid.uuid4().hex[:8]}", args.port |
|
|
|
|
|
def log_action(username, action): |
|
if 'action_log' not in st.session_state: |
|
st.session_state.action_log = {} |
|
user_log = st.session_state.action_log.setdefault(username, {}) |
|
current_time = time.time() |
|
user_log = {k: v for k, v in user_log.items() if current_time - v < 10} |
|
st.session_state.action_log[username] = user_log |
|
if action not in user_log: |
|
central = pytz.timezone('US/Central') |
|
with open(HISTORY_FILE, 'a') as f: |
|
f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] {username}: {action}\n") |
|
user_log[action] = current_time |
|
|
|
|
|
def clean_text_for_tts(text): |
|
cleaned = re.sub(r'[#*!\[\]]+', '', text) |
|
cleaned = ' '.join(cleaned.split()) |
|
return cleaned[:200] if cleaned else "No text to speak" |
|
|
|
|
|
async def save_chat_entry(username, message, is_markdown=False): |
|
await asyncio.to_thread(log_action, username, "π¬π - Chat saver") |
|
central = pytz.timezone('US/Central') |
|
timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S") |
|
if is_markdown: |
|
entry = f"[{timestamp}] {username}:\n```markdown\n{message}\n```" |
|
else: |
|
entry = f"[{timestamp}] {username}: {message}" |
|
await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"{entry}\n")) |
|
voice = FUN_USERNAMES.get(username, "en-US-AriaNeural") |
|
cleaned_message = clean_text_for_tts(message) |
|
audio_file = await async_edge_tts_generate(cleaned_message, voice) |
|
if audio_file: |
|
with open(HISTORY_FILE, 'a') as f: |
|
f.write(f"[{timestamp}] {username}: Audio generated - {audio_file}\n") |
|
await broadcast_message(f"{username}|{message}", "chat") |
|
st.session_state.last_chat_update = time.time() |
|
return audio_file |
|
|
|
|
|
async def load_chat(): |
|
username = st.session_state.get('username', 'System π') |
|
await asyncio.to_thread(log_action, username, "ππ - Chat loader") |
|
if not os.path.exists(CHAT_FILE): |
|
await asyncio.to_thread(lambda: open(CHAT_FILE, 'a').write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! π€\n")) |
|
with open(CHAT_FILE, 'r') as f: |
|
content = await asyncio.to_thread(f.read) |
|
return content |
|
|
|
|
|
async def async_edge_tts_generate(text, voice, rate=0, pitch=0, file_format="mp3"): |
|
await asyncio.to_thread(log_action, st.session_state.get('username', 'System π'), "πΆπ - Audio maker") |
|
timestamp = format_timestamp_prefix(st.session_state.get('username', 'System π')) |
|
filename = f"{timestamp}.{file_format}" |
|
filepath = os.path.join(AUDIO_DIR, filename) |
|
communicate = edge_tts.Communicate(text, voice, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz") |
|
try: |
|
await communicate.save(filepath) |
|
return filepath if os.path.exists(filepath) else None |
|
except edge_tts.exceptions.NoAudioReceived: |
|
with open(HISTORY_FILE, 'a') as f: |
|
central = pytz.timezone('US/Central') |
|
f.write(f"[{datetime.now(central).strftime('%Y-%m-%d %H:%M:%S')}] Audio failed for '{text}'\n") |
|
return None |
|
|
|
|
|
def play_and_download_audio(file_path): |
|
if file_path and os.path.exists(file_path): |
|
st.audio(file_path) |
|
if file_path not in st.session_state.base64_cache: |
|
with open(file_path, "rb") as f: |
|
b64 = base64.b64encode(f.read()).decode() |
|
st.session_state.base64_cache[file_path] = b64 |
|
b64 = st.session_state.base64_cache[file_path] |
|
dl_link = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file_path)}">π΅ Download {os.path.basename(file_path)}</a>' |
|
st.markdown(dl_link, unsafe_allow_html=True) |
|
|
|
|
|
async def websocket_handler(websocket, path): |
|
username = st.session_state.get('username', 'System π') |
|
await asyncio.to_thread(log_action, username, "ππ - Websocket handler") |
|
try: |
|
client_id = str(uuid.uuid4()) |
|
room_id = "chat" |
|
st.session_state.active_connections.setdefault(room_id, {})[client_id] = websocket |
|
chat_content = await load_chat() |
|
username = st.session_state.get('username', random.choice(list(FUN_USERNAMES.keys()))) |
|
if not any(f"Client-{client_id}" in line for line in chat_content.split('\n')): |
|
await save_chat_entry(f"Client-{client_id}", f"{username} has joined {START_ROOM}!") |
|
async for message in websocket: |
|
parts = message.split('|', 1) |
|
if len(parts) == 2: |
|
username, content = parts |
|
await save_chat_entry(username, content) |
|
except websockets.ConnectionClosed: |
|
pass |
|
finally: |
|
if room_id in st.session_state.active_connections and client_id in st.session_state.active_connections[room_id]: |
|
del st.session_state.active_connections[room_id][client_id] |
|
|
|
|
|
async def broadcast_message(message, room_id): |
|
await asyncio.to_thread(log_action, st.session_state.get('username', 'System π'), "π’βοΈ - Message broadcaster") |
|
if room_id in st.session_state.active_connections: |
|
disconnected = [] |
|
for client_id, ws in st.session_state.active_connections[room_id].items(): |
|
try: |
|
await ws.send(message) |
|
except websockets.ConnectionClosed: |
|
disconnected.append(client_id) |
|
for client_id in disconnected: |
|
del st.session_state.active_connections[room_id][client_id] |
|
|
|
|
|
async def run_websocket_server(): |
|
await asyncio.to_thread(log_action, st.session_state.get('username', 'System π'), "π₯οΈπ - Server starter") |
|
if not st.session_state.server_running: |
|
server = await websockets.serve(websocket_handler, '0.0.0.0', 8765) |
|
st.session_state.server_running = True |
|
await server.wait_closed() |
|
|
|
|
|
class AudioProcessor: |
|
def __init__(self): |
|
self.cache_dir = "audio_cache" |
|
os.makedirs(self.cache_dir, exist_ok=True) |
|
self.metadata = self._load_metadata() |
|
|
|
def _load_metadata(self): |
|
metadata_file = os.path.join(self.cache_dir, "metadata.json") |
|
return json.load(open(metadata_file)) if os.path.exists(metadata_file) else {} |
|
|
|
def _save_metadata(self): |
|
metadata_file = os.path.join(self.cache_dir, "metadata.json") |
|
with open(metadata_file, 'w') as f: |
|
json.dump(self.metadata, f) |
|
|
|
async def create_audio(self, text, voice='en-US-AriaNeural'): |
|
cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest() |
|
cache_path = os.path.join(self.cache_dir, f"{cache_key}.mp3") |
|
if cache_key in self.metadata and os.path.exists(cache_path): |
|
return open(cache_path, 'rb').read() |
|
text = text.replace("\n", " ").replace("</s>", " ").strip() |
|
if not text: |
|
return None |
|
communicate = edge_tts.Communicate(text, voice) |
|
await communicate.save(cache_path) |
|
self.metadata[cache_key] = { |
|
'timestamp': datetime.now().isoformat(), |
|
'text_length': len(text), |
|
'voice': voice |
|
} |
|
self._save_metadata() |
|
return open(cache_path, 'rb').read() |
|
|
|
def get_download_link(bin_data, filename, size_mb=None): |
|
b64 = base64.b64encode(bin_data).decode() |
|
size_str = f"({size_mb:.1f} MB)" if size_mb else "" |
|
return f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">π₯ {filename} {size_str}</a>' |
|
|
|
def process_pdf(pdf_file, max_pages, voice, audio_processor): |
|
reader = PdfReader(pdf_file) |
|
total_pages = min(len(reader.pages), max_pages) |
|
texts, audios = [], {} |
|
async def process_page(i, text): |
|
audio_data = await audio_processor.create_audio(text, voice) |
|
audios[i] = audio_data |
|
for i in range(total_pages): |
|
text = reader.pages[i].extract_text() |
|
texts.append(text) |
|
threading.Thread(target=lambda: asyncio.run(process_page(i, text))).start() |
|
return texts, audios, total_pages |
|
|
|
|
|
def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False): |
|
client = anthropic.Anthropic(api_key=anthropic_key) |
|
response = client.messages.create( |
|
model="claude-3-sonnet-20240229", |
|
max_tokens=1000, |
|
messages=[{"role": "user", "content": q}] |
|
) |
|
result = response.content[0].text |
|
st.markdown("### Claude's reply π§ :") |
|
st.markdown(result) |
|
md_file = create_file(q, result) |
|
audio_file = speak_with_edge_tts(result, st.session_state.tts_voice) |
|
play_and_download_audio(audio_file) |
|
if useArxiv: |
|
q += result |
|
gradio_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") |
|
refs = gradio_client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0] |
|
result = f"π {q}\n\n{refs}" |
|
md_file, audio_file = save_qa_with_audio(q, result) |
|
play_and_download_audio(audio_file) |
|
papers = parse_arxiv_refs(refs) |
|
if papers and useArxivAudio: |
|
asyncio.run(create_paper_audio_files(papers, q)) |
|
return result, papers |
|
return result, [] |
|
|
|
def create_file(prompt, response, file_type="md"): |
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
filename = f"{timestamp}_{clean_text_for_filename(prompt[:40] + ' ' + response[:40])}.{file_type}" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
f.write(prompt + "\n\n" + response) |
|
return filename |
|
|
|
def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"): |
|
result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format)) |
|
return result |
|
|
|
def save_qa_with_audio(question, answer, voice=None): |
|
voice = voice or st.session_state.tts_voice |
|
md_file = create_file(question, answer, "md") |
|
audio_file = speak_with_edge_tts(f"{question}\n\nAnswer: {answer}", voice) |
|
return md_file, audio_file |
|
|
|
def clean_text_for_filename(text): |
|
text = text.lower() |
|
text = re.sub(r'[^\w\s-]', '', text) |
|
return '_'.join(text.split())[:200] |
|
|
|
def parse_arxiv_refs(ref_text): |
|
|
|
return [{"title": line.strip(), "url": "", "authors": "", "summary": "", "full_audio": None, "download_base64": ""} for line in ref_text.split('\n') if line.strip()] |
|
|
|
async def create_paper_audio_files(papers, input_question): |
|
for paper in papers: |
|
audio_text = f"{paper['title']}" |
|
audio_file = await async_edge_tts_generate(audio_text, st.session_state.tts_voice) |
|
paper['full_audio'] = audio_file |
|
if audio_file: |
|
with open(audio_file, "rb") as f: |
|
b64 = base64.b64encode(f.read()).decode() |
|
paper['download_base64'] = f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(audio_file)}">π΅ Download</a>' |
|
|
|
|
|
ASR_HTML = """ |
|
<html> |
|
<head> |
|
<title>Continuous Speech Demo</title> |
|
<style> |
|
body { font-family: sans-serif; padding: 20px; max-width: 800px; margin: 0 auto; } |
|
button { padding: 10px 20px; margin: 10px 5px; font-size: 16px; } |
|
#status { margin: 10px 0; padding: 10px; background: #e8f5e9; border-radius: 4px; } |
|
#output { white-space: pre-wrap; padding: 15px; background: #f5f5f5; border-radius: 4px; margin: 10px 0; min-height: 100px; max-height: 400px; overflow-y: auto; } |
|
</style> |
|
</head> |
|
<body> |
|
<div> |
|
<button id="start">Start Listening</button> |
|
<button id="stop" disabled>Stop Listening</button> |
|
<button id="clear">Clear Text</button> |
|
</div> |
|
<div id="status">Ready</div> |
|
<div id="output"></div> |
|
<script> |
|
if (!('webkitSpeechRecognition' in window)) { |
|
alert('Speech recognition not supported'); |
|
} else { |
|
const recognition = new webkitSpeechRecognition(); |
|
const startButton = document.getElementById('start'); |
|
const stopButton = document.getElementById('stop'); |
|
const clearButton = document.getElementById('clear'); |
|
const status = document.getElementById('status'); |
|
const output = document.getElementById('output'); |
|
let fullTranscript = ''; |
|
let lastUpdateTime = Date.now(); |
|
|
|
recognition.continuous = true; |
|
recognition.interimResults = true; |
|
|
|
const startRecognition = () => { |
|
try { |
|
recognition.start(); |
|
status.textContent = 'Listening...'; |
|
startButton.disabled = true; |
|
stopButton.disabled = false; |
|
} catch (e) { |
|
console.error(e); |
|
status.textContent = 'Error: ' + e.message; |
|
} |
|
}; |
|
|
|
window.addEventListener('load', () => setTimeout(startRecognition, 1000)); |
|
|
|
startButton.onclick = startRecognition; |
|
|
|
stopButton.onclick = () => { |
|
recognition.stop(); |
|
status.textContent = 'Stopped'; |
|
startButton.disabled = false; |
|
stopButton.disabled = true; |
|
}; |
|
|
|
clearButton.onclick = () => { |
|
fullTranscript = ''; |
|
output.textContent = ''; |
|
sendDataToPython({value: '', dataType: "json"}); |
|
}; |
|
|
|
recognition.onresult = (event) => { |
|
let interimTranscript = ''; |
|
let finalTranscript = ''; |
|
|
|
for (let i = event.resultIndex; i < event.results.length; i++) { |
|
const transcript = event.results[i][0].transcript; |
|
if (event.results[i].isFinal) { |
|
finalTranscript += transcript + '\\n'; |
|
} else { |
|
interimTranscript += transcript; |
|
} |
|
} |
|
|
|
if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) { |
|
if (finalTranscript) fullTranscript += finalTranscript; |
|
lastUpdateTime = Date.now(); |
|
output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : ''); |
|
output.scrollTop = output.scrollHeight; |
|
sendDataToPython({value: fullTranscript, dataType: "json"}); |
|
} |
|
}; |
|
|
|
recognition.onend = () => { |
|
if (!stopButton.disabled) { |
|
try { |
|
recognition.start(); |
|
console.log('Restarted recognition'); |
|
} catch (e) { |
|
console.error('Failed to restart:', e); |
|
status.textContent = 'Error restarting: ' + e.message; |
|
startButton.disabled = false; |
|
stopButton.disabled = true; |
|
} |
|
} |
|
}; |
|
|
|
recognition.onerror = (event) => { |
|
console.error('Recognition error:', event.error); |
|
status.textContent = 'Error: ' + event.error; |
|
if (event.error === 'not-allowed' || event.error === 'service-not-allowed') { |
|
startButton.disabled = false; |
|
stopButton.disabled = true; |
|
} |
|
}; |
|
} |
|
|
|
function sendDataToPython(data) { |
|
window.parent.postMessage({ |
|
isStreamlitMessage: true, |
|
type: "streamlit:setComponentValue", |
|
...data |
|
}, "*"); |
|
} |
|
|
|
window.addEventListener('load', () => { |
|
window.setTimeout(() => { |
|
window.parent.postMessage({ |
|
isStreamlitMessage: true, |
|
type: "streamlit:setFrameHeight", |
|
height: document.documentElement.clientHeight |
|
}, "*"); |
|
}, 0); |
|
}); |
|
</script> |
|
</body> |
|
</html> |
|
""" |
|
|
|
|
|
def main(): |
|
NODE_NAME, port = get_node_name() |
|
loop = asyncio.new_event_loop() |
|
asyncio.set_event_loop(loop) |
|
|
|
async def async_interface(): |
|
if 'username' not in st.session_state: |
|
chat_content = await load_chat() |
|
available_names = [name for name in FUN_USERNAMES if not any(f"{name} has joined" in line for line in chat_content.split('\n'))] |
|
st.session_state.username = random.choice(available_names) if available_names else random.choice(list(FUN_USERNAMES.keys())) |
|
st.session_state.tts_voice = FUN_USERNAMES[st.session_state.username] |
|
st.markdown(f"**ποΈ Voice**: {st.session_state.tts_voice} π£οΈ for {st.session_state.username}") |
|
|
|
st.title(f"π€π§ MMO Chat & Research for {st.session_state.username}ππ¬") |
|
st.markdown(f"Welcome to {START_ROOM} - chat, research, upload, and more! π") |
|
|
|
if not st.session_state.server_task: |
|
st.session_state.server_task = loop.create_task(run_websocket_server()) |
|
|
|
|
|
tab_main = st.radio("Action:", ["π€ Chat & Voice", "πΈ Media", "π ArXiv", "π PDF to Audio"], horizontal=True) |
|
useArxiv = st.checkbox("Search Arxiv", value=True) |
|
useArxivAudio = st.checkbox("Generate Arxiv Audio", value=False) |
|
|
|
|
|
if tab_main == "π€ Chat & Voice": |
|
st.subheader(f"{START_ROOM} Chat π¬") |
|
chat_content = await load_chat() |
|
chat_lines = chat_content.split('\n') |
|
for i, line in enumerate(chat_lines): |
|
if line.strip() and ': ' in line: |
|
st.markdown(line) |
|
if st.button("π’ Speak", key=f"speak_{i}"): |
|
audio_file = await async_edge_tts_generate(clean_text_for_tts(line.split(': ', 1)[1]), st.session_state.tts_voice) |
|
play_and_download_audio(audio_file) |
|
|
|
message = st.text_input(f"Message as {st.session_state.username}", key="message_input") |
|
if st.button("Send π") and message.strip(): |
|
await save_chat_entry(st.session_state.username, message, is_markdown=True) |
|
st.rerun() |
|
|
|
st.subheader("π€ Continuous Speech Input") |
|
asr_component = components.html(ASR_HTML, height=400) |
|
if asr_component and isinstance(asr_component, dict) and 'value' in asr_component: |
|
transcript = asr_component['value'].strip() |
|
if transcript and transcript != st.session_state.last_transcript: |
|
await save_chat_entry(st.session_state.username, transcript, is_markdown=True) |
|
st.session_state.last_transcript = transcript |
|
st.rerun() |
|
|
|
|
|
elif tab_main == "πΈ Media": |
|
st.header("πΈ Media Gallery") |
|
tabs = st.tabs(["π΅ Audio", "πΌ Images", "π₯ Video"]) |
|
with tabs[0]: |
|
st.subheader("π΅ Audio Files") |
|
audio_files = glob.glob(f"{MEDIA_DIR}/*.mp3") |
|
for a in audio_files: |
|
with st.expander(os.path.basename(a)): |
|
play_and_download_audio(a) |
|
with tabs[1]: |
|
st.subheader("πΌ Images") |
|
imgs = glob.glob(f"{MEDIA_DIR}/*.png") + glob.glob(f"{MEDIA_DIR}/*.jpg") |
|
if imgs: |
|
cols = st.columns(3) |
|
for i, f in enumerate(imgs): |
|
with cols[i % 3]: |
|
st.image(f, use_container_width=True) |
|
with tabs[2]: |
|
st.subheader("π₯ Videos") |
|
vids = glob.glob(f"{MEDIA_DIR}/*.mp4") |
|
for v in vids: |
|
with st.expander(os.path.basename(v)): |
|
st.video(v) |
|
|
|
uploaded_file = st.file_uploader("Upload Media", type=['png', 'jpg', 'mp4', 'mp3']) |
|
if uploaded_file: |
|
timestamp = format_timestamp_prefix(st.session_state.username) |
|
ext = uploaded_file.name.split('.')[-1] |
|
file_hash = hashlib.md5(uploaded_file.getbuffer()).hexdigest()[:8] |
|
filename = f"{timestamp}-{file_hash}.{ext}" |
|
file_path = os.path.join(MEDIA_DIR, filename) |
|
with open(file_path, 'wb') as f: |
|
f.write(uploaded_file.getbuffer()) |
|
await save_chat_entry(st.session_state.username, f"Uploaded media: {file_path}") |
|
st.rerun() |
|
|
|
|
|
elif tab_main == "π ArXiv": |
|
st.subheader("π Query ArXiv") |
|
q = st.text_input("π Query:") |
|
if q and st.button("π Run"): |
|
result, papers = perform_ai_lookup(q, useArxiv=useArxiv, useArxivAudio=useArxivAudio) |
|
for paper in papers: |
|
with st.expander(paper['title']): |
|
st.markdown(f"**Summary**: {paper['summary']}") |
|
if paper['full_audio']: |
|
play_and_download_audio(paper['full_audio']) |
|
|
|
|
|
elif tab_main == "π PDF to Audio": |
|
st.subheader("π PDF to Audio Converter") |
|
audio_processor = AudioProcessor() |
|
uploaded_file = st.file_uploader("Choose a PDF file", "pdf") |
|
max_pages = st.slider('Pages to process', 1, 100, 10) |
|
if uploaded_file: |
|
with st.spinner('Processing PDF...'): |
|
texts, audios, total_pages = process_pdf(uploaded_file, max_pages, st.session_state.tts_voice, audio_processor) |
|
for i, text in enumerate(texts): |
|
with st.expander(f"Page {i+1}"): |
|
st.markdown(text) |
|
while i not in audios: |
|
time.sleep(0.1) |
|
if audios[i]: |
|
st.audio(audios[i], format='audio/mp3') |
|
st.markdown(get_download_link(audios[i], f'page_{i+1}.mp3', len(audios[i]) / (1024 * 1024)), unsafe_allow_html=True) |
|
|
|
|
|
st.sidebar.subheader("Voice Settings") |
|
new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username)) |
|
if new_username != st.session_state.username: |
|
await save_chat_entry("System π", f"{st.session_state.username} changed to {new_username}") |
|
st.session_state.username = new_username |
|
st.session_state.tts_voice = FUN_USERNAMES[new_username] |
|
st.rerun() |
|
|
|
loop.run_until_complete(async_interface()) |
|
|
|
if __name__ == "__main__": |
|
main() |