Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Sleeping

App Files Files Community

awacke1 commited on Dec 19, 2024

Commit

2b0f7f4

verified ·

1 Parent(s): cc67713

Update app.py

Browse files

Files changed (1) hide show

app.py +191 -127

app.py CHANGED Viewed

@@ -6,18 +6,26 @@ from sklearn.metrics.pairwise import cosine_similarity
 import torch
 import json
 import os
 from pathlib import Path
 from datetime import datetime
 import edge_tts
 import asyncio
 import base64
 import streamlit.components.v1 as components
 # Page configuration
 st.set_page_config(
-    page_title="Video Search with Speech",
     page_icon="🎥",
-    layout="wide"
 )
 # Initialize session state
@@ -25,8 +33,21 @@ if 'search_history' not in st.session_state:
     st.session_state['search_history'] = []
 if 'last_voice_input' not in st.session_state:
     st.session_state['last_voice_input'] = ""
-# Initialize the speech component
 speech_component = components.declare_component("speech_recognition", path="mycomponent")
 class VideoSearch:
@@ -35,59 +56,87 @@ class VideoSearch:
         self.load_dataset()
     def fetch_dataset_rows(self):
-        """Fetch dataset from Hugging Face API"""
-        import requests
-        # Fetch first rows from the dataset
-        url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
-        response = requests.get(url)
-        if response.status_code == 200:
-            data = response.json()
-            # Extract the rows from the response
-            rows = data.get('rows', [])
-            return pd.DataFrame(rows)
-        else:
-            st.error(f"Error fetching dataset: {response.status_code}")
-            return None
-    def get_dataset_splits(self):
-        """Get available dataset splits"""
-        import requests
-        url = "https://datasets-server.huggingface.co/splits?dataset=omegalabsinc%2Fomega-multimodal"
-        response = requests.get(url)
-        if response.status_code == 200:
-            splits_data = response.json()
-            return splits_data
-        else:
-            st.error(f"Error fetching splits: {response.status_code}")
             return None
     def load_dataset(self):
-        """Load the Omega Multimodal dataset"""
         try:
-            # Fetch dataset from Hugging Face API
             self.dataset = self.fetch_dataset_rows()
             if self.dataset is not None:
-                # Get dataset splits info
-                splits_info = self.get_dataset_splits()
-                if splits_info:
-                    st.sidebar.write("Available splits:", splits_info)
                 self.prepare_features()
             else:
                 self.create_dummy_data()
         except Exception as e:
             st.error(f"Error loading dataset: {e}")
             self.create_dummy_data()
     def prepare_features(self):
-        """Prepare and cache embeddings"""
-        # Convert string representations of embeddings back to numpy arrays
         try:
             self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
                                         for e in self.dataset.video_embed])
@@ -95,38 +144,17 @@ class VideoSearch:
                                        for e in self.dataset.description_embed])
         except Exception as e:
             st.error(f"Error preparing features: {e}")
-            # Create random embeddings as fallback
             num_rows = len(self.dataset)
             self.video_embeds = np.random.randn(num_rows, 384)
             self.text_embeds = np.random.randn(num_rows, 384)
-    def create_dummy_data(self):
-        """Create dummy data for testing"""
-        self.dataset = pd.DataFrame({
-            'video_id': [f'video_{i}' for i in range(10)],
-            'youtube_id': ['dQw4w9WgXcQ'] * 10,
-            'description': ['Sample video description'] * 10,
-            'views': [1000] * 10,
-            'start_time': [0] * 10,
-            'end_time': [60] * 10
-        })
-        # Create dummy embeddings
-        self.video_embeds = np.random.randn(10, 384)  # Match model dimensions
-        self.text_embeds = np.random.randn(10, 384)
     def search(self, query, top_k=5):
-        """Search videos using query"""
         query_embedding = self.text_model.encode([query])[0]
-        # Compute similarities
         video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
         text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
-        # Combine similarities
         combined_sims = 0.5 * video_sims + 0.5 * text_sims
-        # Get top results
         top_indices = np.argsort(combined_sims)[-top_k:][::-1]
         results = []
@@ -140,30 +168,76 @@ class VideoSearch:
                 'relevance_score': float(combined_sims[idx]),
                 'views': self.dataset.iloc[idx]['views']
             })
         return results
 async def generate_speech(text, voice="en-US-AriaNeural"):
     """Generate speech using Edge TTS"""
     if not text.strip():
         return None
-    communicate = edge_tts.Communicate(text, voice)
-    audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
-    await communicate.save(audio_file)
-    return audio_file
 def main():
-    st.title("🎥 Video Search with Speech Recognition")
-    # Initialize video search
     search = VideoSearch()
-    # Create tabs
-    tab1, tab2, tab3 = st.tabs(["🔍 Search", "🎙️ Voice Input", "💾 History"])
     with tab1:
-        st.subheader("Search Videos")
         # Text search
         query = st.text_input("Enter your search query:")
@@ -203,74 +277,64 @@ def main():
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
-                                # Cleanup audio file
-                                if os.path.exists(audio_file):
-                                    os.remove(audio_file)
     with tab2:
-        st.subheader("Voice Input")
-        # Speech recognition component
-        voice_input = speech_component()
-        if voice_input and voice_input != st.session_state['last_voice_input']:
-            st.session_state['last_voice_input'] = voice_input
-            st.markdown("**Transcribed Text:**")
-            st.write(voice_input)
-            if st.button("🔍 Search Videos"):
-                results = search.search(voice_input, num_results)
-                st.session_state['search_history'].append({
-                    'query': voice_input,
-                    'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    'results': results
-                })
-                for i, result in enumerate(results, 1):
-                    with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
-                        st.write(result['description'])
-                        if result['youtube_id']:
-                            st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
     with tab3:
-        st.subheader("Search History")
-        if st.button("🗑️ Clear History"):
-            st.session_state['search_history'] = []
-            st.experimental_rerun()
-        for i, entry in enumerate(reversed(st.session_state['search_history'])):
-            with st.expander(f"Query: {entry['query']} ({entry['timestamp']})", expanded=False):
-                st.markdown(f"**Original Query:** {entry['query']}")
-                st.markdown(f"**Time:** {entry['timestamp']}")
-                for j, result in enumerate(entry['results'], 1):
-                    st.markdown(f"**Result {j}:**")
-                    st.write(result['description'])
-                    if result['youtube_id']:
-                        st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
-    # Sidebar configuration
     with st.sidebar:
-        st.subheader("⚙️ Configuration")
-        st.markdown("**Video Search Settings**")
-        st.slider("Default Results:", 1, 10, 5, key="default_results")
-        st.markdown("**Voice Settings**")
         st.selectbox("TTS Voice:",
                     ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
                     key="tts_voice")
-        st.markdown("**Model Settings**")
-        st.selectbox("Text Embedding Model:",
-                    ["all-MiniLM-L6-v2", "paraphrase-multilingual-MiniLM-L12-v2"],
-                    key="embedding_model")
-        if st.button("📥 Download Search History"):
-            # Convert history to JSON
-            history_json = json.dumps(st.session_state['search_history'], indent=2)
-            b64 = base64.b64encode(history_json.encode()).decode()
-            href = f'<a href="data:file/json;base64,{b64}" download="search_history.json">Download JSON</a>'
-            st.markdown(href, unsafe_allow_html=True)
 if __name__ == "__main__":
     main()

 import torch
 import json
 import os
+import glob
 from pathlib import Path
 from datetime import datetime
 import edge_tts
 import asyncio
 import base64
+import requests
+import plotly.graph_objects as go
+from gradio_client import Client
+from collections import defaultdict
+from bs4 import BeautifulSoup
+from audio_recorder_streamlit import audio_recorder
 import streamlit.components.v1 as components
 # Page configuration
 st.set_page_config(
+    page_title="Video Search & Research Assistant",
     page_icon="🎥",
+    layout="wide",
+    initial_sidebar_state="auto",
 )
 # Initialize session state
     st.session_state['search_history'] = []
 if 'last_voice_input' not in st.session_state:
     st.session_state['last_voice_input'] = ""
+if 'transcript_history' not in st.session_state:
+    st.session_state['transcript_history'] = []
+if 'should_rerun' not in st.session_state:
+    st.session_state['should_rerun'] = False
+# Custom styling
+st.markdown("""
+<style>
+    .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
+    .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
+    .stButton>button { margin-right: 0.5rem; }
+</style>
+""", unsafe_allow_html=True)
+# Initialize components
 speech_component = components.declare_component("speech_recognition", path="mycomponent")
 class VideoSearch:
         self.load_dataset()
     def fetch_dataset_rows(self):
+        """Fetch dataset from Hugging Face API with debug and caching"""
+        try:
+            # First try to load from local cache
+            cache_file = "dataset_cache.json"
+            if os.path.exists(cache_file):
+                st.info("Loading from cache...")
+                with open(cache_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                return pd.DataFrame(data)
+            st.info("Fetching from Hugging Face API...")
+            url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
+            # Add debug output
+            st.write(f"Requesting URL: {url}")
+            response = requests.get(url, timeout=30)
+            st.write(f"Response status: {response.status_code}")
+            if response.status_code == 200:
+                data = response.json()
+                # Debug output
+                st.write("Response structure:", list(data.keys()))
+                if 'rows' in data:
+                    rows = data['rows']
+                    # Cache the response
+                    with open(cache_file, 'w', encoding='utf-8') as f:
+                        json.dump(rows, f)
+                    df = pd.DataFrame(rows)
+                    # Debug output
+                    st.write("DataFrame columns:", list(df.columns))
+                    st.write("Number of rows:", len(df))
+                    return df
+                else:
+                    st.error("No 'rows' found in API response")
+                    st.write("API Response:", data)
+                    # Try loading example data
+                    example_file = "example_data.json"
+                    if os.path.exists(example_file):
+                        st.info("Loading example data...")
+                        with open(example_file, 'r', encoding='utf-8') as f:
+                            example_data = json.load(f)
+                        return pd.DataFrame(example_data)
+                    return None
+            else:
+                st.error(f"API request failed with status code: {response.status_code}")
+                if response.status_code == 404:
+                    st.error("Dataset not found - check the dataset name and configuration")
+                try:
+                    error_details = response.json()
+                    st.write("Error details:", error_details)
+                except:
+                    st.write("Raw response:", response.text)
+                return None
+        except Exception as e:
+            st.error(f"Error fetching dataset: {str(e)}")
+            import traceback
+            st.write("Traceback:", traceback.format_exc())
             return None
     def load_dataset(self):
         try:
             self.dataset = self.fetch_dataset_rows()
             if self.dataset is not None:
                 self.prepare_features()
             else:
                 self.create_dummy_data()
         except Exception as e:
             st.error(f"Error loading dataset: {e}")
             self.create_dummy_data()
     def prepare_features(self):
         try:
             self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
                                         for e in self.dataset.video_embed])
                                        for e in self.dataset.description_embed])
         except Exception as e:
             st.error(f"Error preparing features: {e}")
             num_rows = len(self.dataset)
             self.video_embeds = np.random.randn(num_rows, 384)
             self.text_embeds = np.random.randn(num_rows, 384)
     def search(self, query, top_k=5):
         query_embedding = self.text_model.encode([query])[0]
         video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
         text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
         combined_sims = 0.5 * video_sims + 0.5 * text_sims
         top_indices = np.argsort(combined_sims)[-top_k:][::-1]
         results = []
                 'relevance_score': float(combined_sims[idx]),
                 'views': self.dataset.iloc[idx]['views']
             })
         return results
+def perform_arxiv_search(query, vocal_summary=True, extended_refs=False):
+    """Perform Arxiv search with audio summaries"""
+    try:
+        client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+        refs = client.predict(query, 20, "Semantic Search",
+                            "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                            api_name="/update_with_rag_md")[0]
+        response = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                                True, api_name="/ask_llm")
+        result = f"### 🔎 {query}\n\n{response}\n\n{refs}"
+        st.markdown(result)
+        if vocal_summary:
+            audio_file = asyncio.run(generate_speech(response[:500]))
+            if audio_file:
+                st.audio(audio_file)
+                os.remove(audio_file)
+        return result
+    except Exception as e:
+        st.error(f"Error in Arxiv search: {e}")
+        return None
 async def generate_speech(text, voice="en-US-AriaNeural"):
     """Generate speech using Edge TTS"""
     if not text.strip():
         return None
+    try:
+        communicate = edge_tts.Communicate(text, voice)
+        audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
+        await communicate.save(audio_file)
+        return audio_file
+    except Exception as e:
+        st.error(f"Error generating speech: {e}")
+        return None
+def process_audio_input(audio_bytes):
+    """Process audio input from recorder"""
+    if audio_bytes:
+        # Save temporary file
+        audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
+        with open(audio_path, "wb") as f:
+            f.write(audio_bytes)
+        # Here you would typically use a speech-to-text service
+        # For now, we'll just acknowledge the recording
+        st.success("Audio recorded successfully!")
+        # Cleanup
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
+        return True
+    return False
 def main():
+    st.title("🎥 Video Search & Research Assistant")
+    # Initialize search
     search = VideoSearch()
+    # Create main tabs
+    tab1, tab2, tab3 = st.tabs(["🔍 Video Search", "🎙️ Voice & Audio", "📚 Arxiv Research"])
     with tab1:
+        st.subheader("Search Video Dataset")
         # Text search
         query = st.text_input("Enter your search query:")
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
+                                os.remove(audio_file)
     with tab2:
+        st.subheader("Voice Input & Audio Recording")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.write("🎙️ Speech Recognition")
+            voice_input = speech_component()
+            if voice_input and voice_input != st.session_state['last_voice_input']:
+                st.session_state['last_voice_input'] = voice_input
+                st.markdown("**Transcribed Text:**")
+                st.write(voice_input)
+                if st.button("🔍 Search Videos"):
+                    results = search.search(voice_input, num_results)
+                    for i, result in enumerate(results, 1):
+                        with st.expander(f"Result {i}", expanded=i==1):
+                            st.write(result['description'])
+                            if result['youtube_id']:
+                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
+        with col2:
+            st.write("🎵 Audio Recorder")
+            audio_bytes = audio_recorder()
+            if audio_bytes:
+                process_audio_input(audio_bytes)
     with tab3:
+        st.subheader("Arxiv Research")
+        arxiv_query = st.text_input("🔍 Research Query:")
+        col1, col2 = st.columns(2)
+        with col1:
+            vocal_summary = st.checkbox("Generate Audio Summary", value=True)
+        with col2:
+            extended_refs = st.checkbox("Include Extended References", value=False)
+        if st.button("🔍 Search Arxiv") and arxiv_query:
+            perform_arxiv_search(arxiv_query, vocal_summary, extended_refs)
+    # Sidebar for history and settings
     with st.sidebar:
+        st.subheader("⚙️ Settings & History")
+        if st.button("🗑️ Clear History"):
+            st.session_state['search_history'] = []
+            st.experimental_rerun()
+        st.markdown("### Recent Searches")
+        for entry in reversed(st.session_state['search_history'][-5:]):
+            st.markdown(f"**{entry['timestamp']}**: {entry['query']}")
+        st.markdown("### Voice Settings")
         st.selectbox("TTS Voice:",
                     ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
                     key="tts_voice")
 if __name__ == "__main__":
     main()