Spaces:

bchander
/

agents_course

Sleeping

App Files Files Community

Bhanu-Chander-ABB commited on Jun 11

Commit

c803551

1 Parent(s): d8028fc

Phi-4

Browse files

Files changed (1) hide show

app.py +49 -2

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langchain.tools import tool
 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain.agents import initialize_agent, AgentType
 from bs4 import BeautifulSoup
 ## # Load environment variables from .env file
 # --- Constants ---
@@ -218,6 +219,50 @@ def web_scrape_tool(url: str) -> str:
     except Exception as e:
         return f"error: {e}"
 ##-- Tool Discovery ---
 # Use @tool for each function.
 # Use get_all_tools() to auto-discover all decorated tools.
@@ -235,7 +280,9 @@ tools_list = [
     image_caption,
     ocr_image,
     classify_image,
-    web_scrape_tool
 ]
 tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
@@ -284,7 +331,7 @@ Instructions:
 # Generate the chat interface, including the tools
 llm = HuggingFaceEndpoint(
-    repo_id="deepseek-ai/DeepSeek-R1",
     # repo_id="Qwen/Qwen2.5-32B-Instruct",
     huggingfacehub_api_token=HF_ACCESS_KEY,
     # model_kwargs={'prompt': system_prompt}

 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain.agents import initialize_agent, AgentType
 from bs4 import BeautifulSoup
+import base64
 ## # Load environment variables from .env file
 # --- Constants ---
     except Exception as e:
         return f"error: {e}"
+# --- TOOL 13: Audio to Text Transcription Tool ---
+@tool
+def audio_to_text(audio_url: str) -> str:
+    """
+    Transcribe speech from an audio file URL to text using Hugging Face's Whisper model.
+    Input: A direct link to an audio file (e.g., .mp3, .wav).
+    Output: The transcribed text.
+    """
+    api_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
+    headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
+    try:
+        # Download the audio file
+        audio_resp = requests.get(audio_url, timeout=30)
+        audio_resp.raise_for_status()
+        audio_bytes = audio_resp.content
+        # Encode audio as base64 for API
+        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+        payload = {
+            "inputs": audio_b64,
+            "parameters": {"return_timestamps": False}
+        }
+        resp = requests.post(api_url, headers=headers, json=payload, timeout=60)
+        resp.raise_for_status()
+        data = resp.json()
+        return data.get("text", "no_answer")
+    except Exception as e:
+        return f"error: {e}"
+# --- TOOL 14: Python Code Executor Tool ---
+@tool
+def python_executor(code: str) -> str:
+    """
+    Safely execute simple Python code and return the result.
+    Only supports expressions and basic statements (no imports, file I/O, or system access).
+    """
+    try:
+        # Restrict built-ins for safety
+        allowed_builtins = {"abs": abs, "min": min, "max": max, "sum": sum, "len": len, "range": range}
+        # Only allow expressions, not statements
+        result = eval(code, {"__builtins__": allowed_builtins}, {})
+        return str(result)
+    except Exception as e:
+        return f"error: {e}"
 ##-- Tool Discovery ---
 # Use @tool for each function.
 # Use get_all_tools() to auto-discover all decorated tools.
     image_caption,
     ocr_image,
     classify_image,
+    web_scrape_tool,
+    audio_to_text,
+    python_executor
 ]
 tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
 # Generate the chat interface, including the tools
 llm = HuggingFaceEndpoint(
+    repo_id="microsoft/Phi-4-multimodal-instruct",
     # repo_id="Qwen/Qwen2.5-32B-Instruct",
     huggingfacehub_api_token=HF_ACCESS_KEY,
     # model_kwargs={'prompt': system_prompt}