Spaces:

gaur3009
/

Scaper_search

Runtime error

App Files Files Community

gaur3009 commited on Jul 14

Commit

4543af6

verified ·

1 Parent(s): 67d5453

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -59

app.py CHANGED Viewed

@@ -1,95 +1,127 @@
 import gradio as gr
-import asyncio
-from functools import lru_cache
 from search import search_google
 from llm import generate_answer
-from memory import ConversationMemory
-from utils import async_timeout
-# Initialize conversation memory
-memory = ConversationMemory()
-@async_timeout(30)  # Timeout after 30 seconds
-async def ask_agent(question, progress=gr.Progress()):
-    # Track progress
-    progress(0.1, desc="📡 Searching the web...")
-    # Retrieve conversation context
-    context = memory.get_context()
-    # Search for information (with caching)
-    search_results = await cached_search_async(question, num_results=5)
     if not search_results:
-        return "I couldn't find any relevant information about that. Could you try rephrasing your question?"
-    # Track progress
-    progress(0.4, desc="🧠 Processing information...")
-    # Generate human-like response
-    answer = await generate_answer(
-        question=question,
-        context=context,
-        search_results=search_results
-    )
-    # Update conversation history
-    memory.add_exchange(question, answer['response'])
-    # Format response with sources
-    formatted_response = f"""
-🤖 **Assistant**: {answer['response']}\n
-🔍 **Sources I used**:\n
-"""
-    for source in answer['sources']:
-        formatted_response += f"- [{source['title']}]({source['url']})\n"
-    # Track progress
     progress(1.0, desc="✅ Response ready")
-    return formatted_response
-# Cached async search
-@lru_cache(maxsize=100)
-def cached_search_async(query, num_results=5):
-    return search_google(query, num_results)
-# Gradio chat interface with progress tracking
-with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 800px; margin: auto;}") as demo:
     gr.Markdown("""
-    <div style="text-align: center;">
-        <h1>🧠 AI Research Assistant</h1>
-        <p>I can help you find information on any topic!</p>
     </div>
     """)
     chatbot = gr.Chatbot(height=400, bubble_full_width=False)
     msg = gr.Textbox(label="Your Question", placeholder="Ask me anything...")
-    clear = gr.Button("Clear History")
     status = gr.Textbox("", label="Status", interactive=False)
     async def respond(message, chat_history):
-        # Create progress tracker
         tracker = []
-        # Wrap in try/except for better error handling
         try:
-            bot_message = await ask_agent(
-                message,
                 progress=lambda p, d, t=tracker: tracker.append((p, d))
-            )
             # Update status
             if tracker:
                 status.value = tracker[-1][1]
         except Exception as e:
-            bot_message = f"⚠️ Sorry, I encountered an error: {str(e)[:100]}"
             status.value = "Error occurred"
-        chat_history.append((message, bot_message))
         return "", chat_history
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
-    clear.click(lambda: (memory.clear(), None), None, chatbot, queue=False)
 if __name__ == "__main__":
     demo.queue(concurrency_count=4).launch()

 import gradio as gr
 from search import search_google
+from scraper import scrape_url
+from summarizer import summarize_text
+from rag import VectorStore
 from llm import generate_answer
+import asyncio
+from functools import lru_cache
+from concurrent.futures import ThreadPoolExecutor
+# Initialize vector store
+vs = VectorStore()
+# Cached scraping function
+@lru_cache(maxsize=100)
+def cached_scrape(url):
+    return scrape_url(url)
+async def process_search_results(query):
+    """Search and scrape in parallel"""
+    # Step 1: Search Google for URLs
+    search_results = search_google(query, num_results=5)
+    if not search_results:
+        return None, None
+    # Step 2: Scrape text from each URL in parallel
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        loop = asyncio.get_running_loop()
+        scrape_tasks = [
+            loop.run_in_executor(executor, cached_scrape, result['url'])
+            for result in search_results
+        ]
+        texts = await asyncio.gather(*scrape_tasks)
+    return search_results, texts
+async def ask_agent(question, progress=gr.Progress()):
+    progress(0.1, desc="🔍 Searching the web...")
+    # Process search results
+    search_results, texts = await process_search_results(question)
     if not search_results:
+        return "I couldn't find any relevant information. Please try a different question."
+    progress(0.3, desc="📚 Processing content...")
+    # Step 3: Summarize each text
+    with ThreadPoolExecutor(max_workers=3) as executor:
+        loop = asyncio.get_running_loop()
+        summarize_tasks = [
+            loop.run_in_executor(executor, summarize_text, text, 100)
+            for text in texts
+        ]
+        summaries = await asyncio.gather(*summarize_tasks)
+    # Step 4: Add to vector store
+    vs.add_texts(summaries)
+    progress(0.6, desc="🧠 Finding relevant information...")
+    # Step 5: Retrieve top 3 most relevant texts
+    relevant_texts, indices = vs.retrieve(question, top_k=3)
+    context = "\n\n".join(relevant_texts)
+    progress(0.8, desc="💡 Generating answer...")
+    # Step 6: Generate final answer
+    answer = generate_answer(context, question)
+    # Format response
+    response = f"### 🤖 Assistant\n{answer}\n\n"
+    response += "### 🔍 Sources Used in This Answer:\n"
+    # Add sources used in answer
+    for idx in indices:
+        result = search_results[idx]
+        response += f"- [{result['title']}]({result['url']})\n"
+    # Add other sources
+    other_indices = [i for i in range(len(search_results)) if i not in indices]
+    if other_indices:
+        response += "\n### 📚 Other Useful Sources:\n"
+        for idx in other_indices:
+            result = search_results[idx]
+            response += f"- [{result['title']}]({result['url']})\n"
     progress(1.0, desc="✅ Response ready")
+    return response
+# Gradio interface with progress tracking
+with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 800px}") as demo:
     gr.Markdown("""
+    <div style="text-align: center">
+        <h1>🔍 AI Research Assistant</h1>
+        <p>I'll search the web and summarize information for you!</p>
     </div>
     """)
     chatbot = gr.Chatbot(height=400, bubble_full_width=False)
     msg = gr.Textbox(label="Your Question", placeholder="Ask me anything...")
+    clear = gr.Button("Clear Conversation")
     status = gr.Textbox("", label="Status", interactive=False)
     async def respond(message, chat_history):
         tracker = []
         try:
+            response = await ask_agent(
+                message,
                 progress=lambda p, d, t=tracker: tracker.append((p, d))
             # Update status
             if tracker:
                 status.value = tracker[-1][1]
         except Exception as e:
+            response = f"⚠️ Sorry, I encountered an error: {str(e)[:100]}"
             status.value = "Error occurred"
+        chat_history.append((message, response))
         return "", chat_history
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
+    clear.click(lambda: (vs.clear(), None), None, chatbot, queue=False)
 if __name__ == "__main__":
     demo.queue(concurrency_count=4).launch()