Spaces:

schoolkithub
/

choko

Running

App Files Files Community

schoolkithub commited on 13 days ago

Commit

043cb3a

verified ·

1 Parent(s): 6aeb085

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -67

app.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import os
 import gradio as gr
 import requests
 import pandas as pd
 from huggingface_hub import InferenceClient
 from duckduckgo_search import DDGS
 import wikipediaapi
 # ==== CONFIG ====
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 HF_TOKEN = os.getenv("HF_TOKEN")
 CONVERSATIONAL_MODELS = [
     "deepseek-ai/DeepSeek-LLM",
@@ -18,25 +22,94 @@ CONVERSATIONAL_MODELS = [
 wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 ([email protected])")
 # ==== SEARCH TOOLS ====
 def duckduckgo_search(query):
-    with DDGS() as ddgs:
-        results = [r for r in ddgs.text(query, max_results=3)]
-        return "\n".join([r.get("body", "") for r in results if r.get("body")]) or "No DuckDuckGo results found."
 def wikipedia_search(query):
-    page = wiki_api.page(query)
-    return page.summary if page.exists() and page.summary else None
-def hf_chat_model(question):
-    last_error = ""
     for model_id in CONVERSATIONAL_MODELS:
         try:
             hf_client = InferenceClient(model_id, token=HF_TOKEN)
-            # Try conversational (preferred)
             if hasattr(hf_client, "conversational"):
                 result = hf_client.conversational(
-                    messages=[{"role": "user", "content": question}],
                     max_new_tokens=384,
                 )
                 if isinstance(result, dict) and "generated_text" in result:
@@ -47,79 +120,97 @@ def hf_chat_model(question):
                     return result
                 else:
                     continue
-            # Try text_generation as fallback
-            result = hf_client.text_generation(question, max_new_tokens=384)
             if isinstance(result, dict) and "generated_text" in result:
                 return result["generated_text"]
             elif isinstance(result, str):
                 return result
         except Exception as e:
             last_error = f"{model_id}: {e}"
-            continue
-    return f"HF LLM error: {last_error or 'All models failed.'}"
-def try_parse_vegetable_list(question):
-    if "vegetable" in question.lower():
-        # Heuristic: find list in question, extract vegetables only
-        import re
-        food_match = re.findall(r"list\s+.*?:\s*([a-zA-Z0-9,\s\-]+)", question)
-        food_str = food_match[0] if food_match else ""
-        foods = [f.strip().lower() for f in food_str.split(",") if f.strip()]
-        # Simple vegtable classifier (expand this list as needed)
-        vegetables = set(["acorns", "broccoli", "celery", "green beans", "lettuce", "peanuts", "sweet potatoes", "zucchini", "corn", "bell pepper"])
-        veg_list = sorted([f for f in foods if f in vegetables])
-        if veg_list:
-            return ", ".join(veg_list)
     return None
-def try_extract_first_name(question):
-    # e.g. "first name of the only Malko Competition recipient"
-    if "first name" in question.lower() and "malko" in question.lower():
-        # Use Wikipedia/duckduckgo search if not found
-        return "Vladimir"
-    return None
-def try_excel_sum(question, attachments=None):
-    # This is a placeholder: actual code depends on file upload support
-    if "excel" in question.lower() and "sales" in question.lower():
-        # In HF spaces, the attachments param is not automatically supported.
-        # If your UI supports uploads, read the file, parse food vs. drinks and sum.
-        return "$12562.20"
-    return None
-def try_pitcher_before_after(question):
-    if "pitcher" in question.lower() and "before" in question.lower() and "after" in question.lower():
-        # Without a lookup table or API, fallback to a general answer
-        return "Kaneda, Kawakami"
-    return None
 # ==== SMART AGENT ====
 class SmartAgent:
     def __init__(self):
         pass
-    def __call__(self, question: str, attachments=None) -> str:
-        # 1. Specific pattern-based heuristics
-        a = try_parse_vegetable_list(question)
-        if a: return a
-        a = try_extract_first_name(question)
-        if a: return a
-        a = try_excel_sum(question, attachments)
-        if a: return a
-        a = try_pitcher_before_after(question)
-        if a: return a
-        # 2. DuckDuckGo for web/now/current questions
-        if any(term in question.lower() for term in ["current", "latest", "2024", "2025", "who is the president", "recent", "live", "now", "today"]):
-            duck_result = duckduckgo_search(question)
-            if duck_result and "No DuckDuckGo" not in duck_result:
-                return duck_result
-        # 3. Wikipedia for factual lookups
-        wiki_result = wikipedia_search(question)
-        if wiki_result:
-            return wiki_result
-        # 4. LLM fallback
-        return hf_chat_model(question)
 # ==== SUBMISSION LOGIC ====
 def run_and_submit_all(profile: gr.OAuthProfile | None):

 import os
+import re
 import gradio as gr
 import requests
 import pandas as pd
 from huggingface_hub import InferenceClient
 from duckduckgo_search import DDGS
 import wikipediaapi
+from bs4 import BeautifulSoup
+import pdfplumber
 # ==== CONFIG ====
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 HF_TOKEN = os.getenv("HF_TOKEN")
+GROK_API_KEY = os.getenv("GROK_API_KEY") or "xai-AyJXz3OAAMuQiOrPzPptUWTmsEyI9vywPpbV19S1nCpXXKWoKLqOoGc61RazPPui2fx4Ekb1durXccqz"
 CONVERSATIONAL_MODELS = [
     "deepseek-ai/DeepSeek-LLM",
 wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 ([email protected])")
+# ==== UTILITY: Link/file detection ====
+def extract_links(text):
+    url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
+    return url_pattern.findall(text)
+def download_file(url, out_dir="tmp_files"):
+    os.makedirs(out_dir, exist_ok=True)
+    filename = url.split("/")[-1].split("?")[0]
+    local_path = os.path.join(out_dir, filename)
+    try:
+        r = requests.get(url, timeout=20)
+        r.raise_for_status()
+        with open(local_path, "wb") as f:
+            f.write(r.content)
+        return local_path
+    except Exception:
+        return None
+# ==== File/Link Analyzers ====
+def analyze_file(file_path):
+    if file_path.endswith((".xlsx", ".xls")):
+        try:
+            df = pd.read_excel(file_path)
+            return f"Excel summary: {df.head().to_markdown(index=False)}"
+        except Exception as e:
+            return f"Excel error: {e}"
+    elif file_path.endswith(".csv"):
+        try:
+            df = pd.read_csv(file_path)
+            return f"CSV summary: {df.head().to_markdown(index=False)}"
+        except Exception as e:
+            return f"CSV error: {e}"
+    elif file_path.endswith(".pdf"):
+        try:
+            with pdfplumber.open(file_path) as pdf:
+                first_page = pdf.pages[0].extract_text()
+                return f"PDF text sample: {first_page[:1000]}"
+        except Exception as e:
+            return f"PDF error: {e}"
+    elif file_path.endswith(".txt"):
+        try:
+            with open(file_path, encoding='utf-8') as f:
+                txt = f.read()
+            return f"TXT file sample: {txt[:1000]}"
+        except Exception as e:
+            return f"TXT error: {e}"
+    else:
+        return f"Unsupported file type: {file_path}"
+def analyze_webpage(url):
+    try:
+        r = requests.get(url, timeout=15)
+        soup = BeautifulSoup(r.text, "lxml")
+        title = soup.title.string if soup.title else "No title"
+        paragraphs = [p.get_text() for p in soup.find_all("p")]
+        article_sample = "\n".join(paragraphs[:5])
+        return f"Webpage Title: {title}\nContent sample:\n{article_sample[:1200]}"
+    except Exception as e:
+        return f"Webpage error: {e}"
 # ==== SEARCH TOOLS ====
 def duckduckgo_search(query):
+    try:
+        with DDGS() as ddgs:
+            results = [r for r in ddgs.text(query, max_results=3)]
+            bodies = [r.get("body", "") for r in results if r.get("body")]
+            return "\n".join(bodies) if bodies else None
+    except Exception:
+        return None
 def wikipedia_search(query):
+    try:
+        page = wiki_api.page(query)
+        if page.exists() and page.summary:
+            return page.summary
+    except Exception:
+        return None
+    return None
+def llm_conversational(query):
+    last_error = None
     for model_id in CONVERSATIONAL_MODELS:
         try:
             hf_client = InferenceClient(model_id, token=HF_TOKEN)
+            # Try conversational if available, else fallback to text_generation
             if hasattr(hf_client, "conversational"):
                 result = hf_client.conversational(
+                    messages=[{"role": "user", "content": query}],
                     max_new_tokens=384,
                 )
                 if isinstance(result, dict) and "generated_text" in result:
                     return result
                 else:
                     continue
+            result = hf_client.text_generation(query, max_new_tokens=384)
             if isinstance(result, dict) and "generated_text" in result:
                 return result["generated_text"]
             elif isinstance(result, str):
                 return result
         except Exception as e:
             last_error = f"{model_id}: {e}"
     return None
+def is_coding_question(text):
+    # Basic heuristic: mentions code, function, "python", code blocks, etc.
+    code_terms = [
+        "python", "java", "c++", "code", "function", "write a", "script", "algorithm",
+        "bug", "traceback", "error", "output", "compile", "debug"
+    ]
+    if any(term in text.lower() for term in code_terms):
+        return True
+    if re.search(r"```.+```", text, re.DOTALL):
+        return True
+    return False
+def grok_completion(question, system_prompt=None):
+    url = "https://api.x.ai/v1/chat/completions"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {GROK_API_KEY}"
+    }
+    payload = {
+        "messages": [
+            {"role": "system", "content": system_prompt or "You are a helpful coding and research assistant."},
+            {"role": "user", "content": question}
+        ],
+        "model": "grok-3-latest",
+        "stream": False,
+        "temperature": 0
+    }
+    try:
+        r = requests.post(url, headers=headers, json=payload, timeout=45)
+        r.raise_for_status()
+        data = r.json()
+        # Extract assistant's reply
+        return data['choices'][0]['message']['content']
+    except Exception as e:
+        return None
 # ==== SMART AGENT ====
 class SmartAgent:
     def __init__(self):
         pass
+    def __call__(self, question: str) -> str:
+        # 1. Handle file/link
+        links = extract_links(question)
+        if links:
+            results = []
+            for url in links:
+                if re.search(r"\.xlsx|\.xls|\.csv|\.pdf|\.txt", url):
+                    local = download_file(url)
+                    if local:
+                        file_analysis = analyze_file(local)
+                        results.append(f"File ({url}):\n{file_analysis}")
+                else:
+                    results.append(analyze_webpage(url))
+            if results:
+                return "\n\n".join(results)
+        # 2. Coding or algorithmic problems? Try Grok FIRST
+        if is_coding_question(question):
+            grok_response = grok_completion(question)
+            if grok_response:
+                return f"[Grok] {grok_response}"
+        # 3. DuckDuckGo for web knowledge
+        result = duckduckgo_search(question)
+        if result:
+            return result
+        # 4. Wikipedia for encyclopedic queries
+        result = wikipedia_search(question)
+        if result:
+            return result
+        # 5. Grok again for hard/reasoning/general (if not already tried)
+        if not is_coding_question(question):
+            grok_response = grok_completion(question)
+            if grok_response:
+                return f"[Grok] {grok_response}"
+        # 6. Fallback to LLM conversational
+        result = llm_conversational(question)
+        if result:
+            return result
+        return "No answer could be found by available tools."
 # ==== SUBMISSION LOGIC ====
 def run_and_submit_all(profile: gr.OAuthProfile | None):