Spaces:

awacke1
/

TorchTransformers-CV-SFT

Sleeping

App Files Files Community

awacke1 commited on Mar 27

Commit

cf22379

verified ·

1 Parent(s): 23a1c52

Update app.py

Browse files

Files changed (1) hide show

app.py +205 -15

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import glob
 import io
 import json
 import logging
 import os
 import pandas as pd
 import pytz
@@ -15,24 +17,32 @@ import re
 import requests
 import shutil
 import streamlit as st
 import sys
 import time
 import torch
 import zipfile
 from audio_recorder_streamlit import audio_recorder
 from contextlib import redirect_stdout
 from dataclasses import dataclass
 from datetime import datetime
 from diffusers import StableDiffusionPipeline
 from io import BytesIO
-from moviepy import VideoFileClip  # Updated import
 from openai import OpenAI
 from PIL import Image
 from PyPDF2 import PdfReader
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from typing import Optional
 # Initialize OpenAI client
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
@@ -60,13 +70,18 @@ st.set_page_config(
 )
 # Session state initialization
-for key in ['history', 'builder', 'model_loaded', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'messages']:
-    st.session_state.setdefault(key, [] if key in ['history', 'messages'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else None if key == 'builder' else 0 if key == 'unique_counter' else False)
 st.session_state.setdefault('selected_model_type', "Causal LM")
 st.session_state.setdefault('selected_model', "None")
 st.session_state.setdefault('gallery_size', 2)
 st.session_state.setdefault('asset_gallery_container', st.sidebar.empty())
 @dataclass
 class ModelConfig:
     name: str
@@ -93,6 +108,11 @@ class ModelBuilder:
         self.config = None
         self.model = None
         self.tokenizer = None
     def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
         with st.spinner(f"Loading {model_path}... ⏳"):
             self.model = AutoModelForCausalLM.from_pretrained(model_path)
@@ -102,7 +122,7 @@ class ModelBuilder:
             if config:
                 self.config = config
             self.model.to("cuda" if torch.cuda.is_available() else "cpu")
-        st.success(f"Model loaded! 🎉")
         return self
     def save_model(self, path: str):
         with st.spinner("Saving model... 💾"):
@@ -130,10 +150,11 @@ class DiffusionBuilder:
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
 def generate_filename(prompt, ext="png"):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
-    safe_prompt = re.sub(r'[<>:"/\\|?*]', '_', prompt)[:240]
     return f"{safe_date_time}_{safe_prompt}.{ext}"
 def get_download_link(file_path, mime_type="application/pdf", label="Download"):
@@ -162,6 +183,7 @@ def download_pdf(url, output_path):
         logger.error(f"Failed to download {url}: {e}")
         return False
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
@@ -248,18 +270,24 @@ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
     except Exception as e:
         return f"Error processing text with GPT: {str(e)}"
-def process_audio(audio_input, prompt):
     with open(audio_input, "rb") as file:
         transcription = client.audio.transcriptions.create(model="whisper-1", file=file)
-    response = client.chat.completions.create(model="gpt-4o-mini", messages=[{"role": "user", "content": f"{prompt}\n\n{transcription.text}"}])
-    return transcription.text, response.choices[0].message.content
 def process_video(video_path, prompt):
     base64Frames, audio_path = process_video_frames(video_path)
     with open(video_path, "rb") as file:
         transcription = client.audio.transcriptions.create(model="whisper-1", file=file)
     messages = [{"role": "user", "content": ["These are the frames from the video.", *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames), {"type": "text", "text": f"The audio transcription is: {transcription.text}\n\n{prompt}"}]}]
-    response = client.chat.completions.create(model="gpt-4o-mini", messages=messages)
     return response.choices[0].message.content
 def process_video_frames(video_path, seconds_per_frame=2):
@@ -300,13 +328,147 @@ def execute_code(code):
     finally:
         buffer.close()
 # Sidebar
 st.sidebar.subheader("Gallery Settings")
 st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
 # Tabs
-tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚"])
-(tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery) = tabs
 with tab_camera:
     st.header("Camera Snap 📷")
@@ -324,8 +486,11 @@ with tab_camera:
 with tab_download:
     st.header("Download PDFs 📥")
-    url_input = st.text_area("Enter PDF URLs (one per line)", height=200)
-    if st.button("Download 🤖"):
         urls = url_input.strip().split("\n")
         progress_bar = st.progress(0)
         for idx, url in enumerate(urls):
@@ -464,6 +629,17 @@ with tab_gallery:
         elif file.endswith('.mp4'):
             st.video(file)
 # Update gallery in sidebar
 def update_gallery():
     container = st.session_state['asset_gallery_container']
@@ -497,4 +673,18 @@ for record in log_records:
 st.sidebar.subheader("History 📜")
 for entry in st.session_state.get("history", []):
     if entry:
-        st.sidebar.write(entry)

 import io
 import json
 import logging
+import math
+import mistune
 import os
 import pandas as pd
 import pytz
 import requests
 import shutil
 import streamlit as st
+import streamlit.components.v1 as components
 import sys
+import textract
 import time
+import tiktoken
 import torch
 import zipfile
 from audio_recorder_streamlit import audio_recorder
+from bs4 import BeautifulSoup
+from collections import deque
 from contextlib import redirect_stdout
 from dataclasses import dataclass
 from datetime import datetime
 from diffusers import StableDiffusionPipeline
+from gradio_client import Client, handle_file
+from huggingface_hub import InferenceClient
 from io import BytesIO
+from moviepy import VideoFileClip
 from openai import OpenAI
 from PIL import Image
 from PyPDF2 import PdfReader
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from typing import Optional
+from urllib.parse import quote
+from xml.etree import ElementTree as ET
 # Initialize OpenAI client
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
 )
 # Session state initialization
+for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
+    st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
+st.session_state.setdefault('builder', None)
+st.session_state.setdefault('model_loaded', False)
 st.session_state.setdefault('selected_model_type', "Causal LM")
 st.session_state.setdefault('selected_model', "None")
 st.session_state.setdefault('gallery_size', 2)
 st.session_state.setdefault('asset_gallery_container', st.sidebar.empty())
+st.session_state.setdefault('cam0_file', None)
+st.session_state.setdefault('cam1_file', None)
+# Model configurations
 @dataclass
 class ModelConfig:
     name: str
         self.config = None
         self.model = None
         self.tokenizer = None
+        self.jokes = [
+            "Why did the AI go to therapy? Too many layers to unpack! 😂",
+            "Training complete! Time for a binary coffee break. ☕",
+            "I told my neural network a joke; it couldn't stop dropping bits! 🤖"
+        ]
     def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
         with st.spinner(f"Loading {model_path}... ⏳"):
             self.model = AutoModelForCausalLM.from_pretrained(model_path)
             if config:
                 self.config = config
             self.model.to("cuda" if torch.cuda.is_available() else "cpu")
+        st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
         return self
     def save_model(self, path: str):
         with st.spinner("Saving model... 💾"):
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
+# Utility functions
 def generate_filename(prompt, ext="png"):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
+    safe_prompt = re.sub(r'[<>:"/\\|?*\n]', '_', prompt)[:240]
     return f"{safe_date_time}_{safe_prompt}.{ext}"
 def get_download_link(file_path, mime_type="application/pdf", label="Download"):
         logger.error(f"Failed to download {url}: {e}")
         return False
+# Processing functions
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
     except Exception as e:
         return f"Error processing text with GPT: {str(e)}"
+def process_audio(audio_input, text_input=''):
     with open(audio_input, "rb") as file:
         transcription = client.audio.transcriptions.create(model="whisper-1", file=file)
+    st.session_state.messages.append({"role": "user", "content": transcription.text})
+    completion = client.chat.completions.create(model="gpt-4o-2024-05-13", messages=[{"role": "user", "content": f"{text_input}\n\n{transcription.text}"}])
+    return_text = completion.choices[0].message.content
+    filename = generate_filename(transcription.text, "md")
+    with open(filename, "w", encoding="utf-8") as f:
+        f.write(text_input + "\n\n" + return_text)
+    st.session_state.messages.append({"role": "assistant", "content": return_text})
+    return transcription.text, return_text
 def process_video(video_path, prompt):
     base64Frames, audio_path = process_video_frames(video_path)
     with open(video_path, "rb") as file:
         transcription = client.audio.transcriptions.create(model="whisper-1", file=file)
     messages = [{"role": "user", "content": ["These are the frames from the video.", *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames), {"type": "text", "text": f"The audio transcription is: {transcription.text}\n\n{prompt}"}]}]
+    response = client.chat.completions.create(model="gpt-4o-2024-05-13", messages=messages)
     return response.choices[0].message.content
 def process_video_frames(video_path, seconds_per_frame=2):
     finally:
         buffer.close()
+def extract_python_code(markdown_text):
+    pattern = r"```python\s*(.*?)\s*```"
+    matches = re.findall(pattern, markdown_text, re.DOTALL)
+    return matches
+# Speech synthesis
+def SpeechSynthesis(result):
+    documentHTML5 = f'''
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Read It Aloud</title>
+        <script type="text/javascript">
+            function readAloud() {{
+                const text = document.getElementById("textArea").value;
+                const speech = new SpeechSynthesisUtterance(text);
+                window.speechSynthesis.speak(speech);
+            }}
+        </script>
+    </head>
+    <body>
+        <h1>🔊 Read It Aloud</h1>
+        <textarea id="textArea" rows="10" cols="80">{result}</textarea>
+        <br>
+        <button onclick="readAloud()">🔊 Read Aloud</button>
+    </body>
+    </html>
+    '''
+    components.html(documentHTML5, width=1280, height=300)
+# ArXiv search
+def search_arxiv(query):
+    start_time = time.strftime("%Y-%m-%d %H:%M:%S")
+    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+    response1 = client.predict(message="Hello!!", llm_results_use=5, database_choice="Semantic Search", llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2", api_name="/update_with_rag_md")
+    Question = f'### 🔎 {query}\r\n'
+    References = response1[0]
+    References2 = response1[1]
+    filename = generate_filename(query, "md")
+    with open(filename, "w", encoding="utf-8") as f:
+        f.write(Question + References + References2)
+    st.session_state.messages.append({"role": "assistant", "content": References + References2})
+    response2 = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1", True, api_name="/ask_llm")
+    if len(response2) > 10:
+        Answer = response2
+        SpeechSynthesis(Answer)
+        results = Question + '\r\n' + Answer + '\r\n' + References + '\r\n' + References2
+        return results
+    return References + References2
+# Glossary data
+roleplaying_glossary = {
+    "🤖 AI Concepts": {
+        "MoE (Mixture of Experts) 🧠": [
+            "As a leading AI health researcher, provide an overview of MoE, MAS, memory, and mirroring in healthcare applications.",
+            "Explain how MoE and MAS can be leveraged to create AGI and AMI systems for healthcare, as an AI architect."
+        ],
+        "Multi Agent Systems (MAS) 🤝": [
+            "As a renowned MAS researcher, describe the key characteristics of distributed, autonomous, and cooperative MAS.",
+            "Discuss how MAS is applied in robotics, simulations, and decentralized problem-solving, as an AI engineer."
+        ]
+    },
+    "🛠️ AI Tools & Platforms": {
+        "ChatDev 💬": [
+            "As a chatbot developer, ask about the features and capabilities ChatDev offers for building conversational AI.",
+            "Inquire about the pre-built assets, integrations, and multi-platform support in ChatDev, as a product manager."
+        ]
+    }
+}
+def display_glossary_grid(roleplaying_glossary):
+    search_urls = {
+        "🚀🌌ArXiv": lambda k: f"/?q={quote(k)}",
+        "📖": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
+        "🔍": lambda k: f"https://www.google.com/search?q={quote(k)}"
+    }
+    for category, details in roleplaying_glossary.items():
+        st.write(f"### {category}")
+        cols = st.columns(len(details))
+        for idx, (game, terms) in enumerate(details.items()):
+            with cols[idx]:
+                st.markdown(f"#### {game}")
+                for term in terms:
+                    links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()])
+                    st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
+# File operations
+def create_zip_of_files(files):
+    zip_name = "assets.zip"
+    with zipfile.ZipFile(zip_name, 'w') as zipf:
+        for file in files:
+            zipf.write(file)
+    return zip_name
+def get_zip_download_link(zip_file):
+    with open(zip_file, 'rb') as f:
+        data = f.read()
+    b64 = base64.b64encode(data).decode()
+    return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
 # Sidebar
 st.sidebar.subheader("Gallery Settings")
 st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
+# File sidebar
+def FileSidebar():
+    all_files = glob.glob("*.md")
+    all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10]
+    all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True)
+    Files1, Files2 = st.sidebar.columns(2)
+    with Files1:
+        if st.button("🗑 Delete All"):
+            for file in all_files:
+                os.remove(file)
+            st.rerun()
+    with Files2:
+        if st.button("⬇️ Download"):
+            zip_file = create_zip_of_files(all_files)
+            st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
+    for file in all_files:
+        col1, col2, col3, col4 = st.sidebar.columns([1, 6, 1, 1])
+        with col1:
+            if st.button("🌐", key=f"md_{file}"):
+                with open(file, "r", encoding='utf-8') as f:
+                    st.markdown(f.read())
+        with col2:
+            st.markdown(get_download_link(file, "text/markdown", file))
+        with col3:
+            if st.button("📂", key=f"open_{file}"):
+                with open(file, "r", encoding='utf-8') as f:
+                    st.text_area("File Contents", f.read(), height=300)
+        with col4:
+            if st.button("🗑", key=f"delete_{file}"):
+                os.remove(file)
+                st.rerun()
+FileSidebar()
 # Tabs
+tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚", "Search 🔎", "Glossary 📖"])
+(tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search, tab_glossary) = tabs
 with tab_camera:
     st.header("Camera Snap 📷")
 with tab_download:
     st.header("Download PDFs 📥")
+    if st.button("Examples 📚"):
+        example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703"]
+        st.session_state['pdf_urls'] = "\n".join(example_urls)
+    url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
+    if st.button("Robo-Download 🤖"):
         urls = url_input.strip().split("\n")
         progress_bar = st.progress(0)
         for idx, url in enumerate(urls):
         elif file.endswith('.mp4'):
             st.video(file)
+with tab_search:
+    st.header("ArXiv Search 🔎")
+    query = st.text_input("Search ArXiv", "")
+    if query:
+        result = search_arxiv(query)
+        st.markdown(result)
+with tab_glossary:
+    st.header("Glossary 📖")
+    display_glossary_grid(roleplaying_glossary)
 # Update gallery in sidebar
 def update_gallery():
     container = st.session_state['asset_gallery_container']
 st.sidebar.subheader("History 📜")
 for entry in st.session_state.get("history", []):
     if entry:
+        st.sidebar.write(entry)
+# Chatbot
+if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    with st.chat_message("assistant"):
+        completion = client.chat.completions.create(model="gpt-4o-2024-05-13", messages=st.session_state.messages, stream=True)
+        response = ""
+        for chunk in completion:
+            if chunk.choices[0].delta.content:
+                response += chunk.choices[0].delta.content
+                st.write(response)
+        st.session_state.messages.append({"role": "assistant", "content": response})