Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on May 5

Commit

206048d

verified ·

1 Parent(s): 391c163

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -11

app.py CHANGED Viewed

@@ -2,9 +2,10 @@ import os
 import gradio as gr
 import requests
 import json
 from typing import List, Dict, Union
-import speech_recognition as sr
 from pydub import AudioSegment
 import wikipediaapi
 import pandas as pd
@@ -19,18 +20,9 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self, ollama_base_url: str = "http://localhost:11434"):
-        """
-        Pure Python agent with:
-        - Local LLM via Ollama
-        - Web search (SearxNG)
-        - Wikipedia access
-        - Document processing
-        - Speech-to-text
-        """
         self.ollama_url = f"{ollama_base_url}/api/generate"
-        self.searx_url = "https://searx.space/search"  # Public Searx instance
         self.wiki = wikipediaapi.Wikipedia('en')
-        self.recognizer = sr.Recognizer()
         print("BasicAgent initialized.")
@@ -40,8 +32,69 @@ class BasicAgent:
         print(f"Agent returning answer: {fixed_answer}")
         return fixed_answer
     def call_llm(self, prompt: str, model: str = "llama3") -> str:
         """Call local Ollama LLM"""

 import gradio as gr
 import requests
+import os
+import requests
 import json
 from typing import List, Dict, Union
 from pydub import AudioSegment
 import wikipediaapi
 import pandas as pd
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self, ollama_base_url: str = "http://localhost:11434"):
         self.ollama_url = f"{ollama_base_url}/api/generate"
+        self.searx_url = "https://searx.space/search"
         self.wiki = wikipediaapi.Wikipedia('en')
         print("BasicAgent initialized.")
         print(f"Agent returning answer: {fixed_answer}")
         return fixed_answer
+        # Initialize Vosk if available
+        self.vosk_model = None
+        try:
+            from vosk import Model, KaldiRecognizer
+            model_path = "vosk-model-small-en-us-0.15"
+            if os.path.exists(model_path):
+                self.vosk_model = Model(model_path)
+        except ImportError:
+            pass
+    def transcribe_audio(self, audio_path: str) -> str:
+        """Speech-to-text using Vosk or basic audio processing"""
+        # Convert to WAV if needed
+        if not audio_path.endswith('.wav'):
+            try:
+                sound = AudioSegment.from_file(audio_path)
+                audio_path = "temp.wav"
+                sound.export(audio_path, format="wav")
+            except:
+                return "Audio conversion failed"
+        # Try Vosk first if available
+        if self.vosk_model:
+            try:
+                from vosk import KaldiRecognizer
+                import wave
+                wf = wave.open(audio_path, "rb")
+                rec = KaldiRecognizer(self.vosk_model, wf.getframerate())
+                results = []
+                while True:
+                    data = wf.readframes(4000)
+                    if len(data) == 0:
+                        break
+                    if rec.AcceptWaveform(data):
+                        results.append(json.loads(rec.Result()))
+                final = json.loads(rec.FinalResult())
+                if final['text']:
+                    results.append(final)
+                return " ".join([r['text'] for r in results if 'text' in r])
+            except Exception as e:
+                return f"Vosk Error: {str(e)}"
+        # Fallback: Return audio metadata
+        try:
+            sound = AudioSegment.from_file(audio_path)
+            return f"Audio file: {sound.duration_seconds} seconds, {sound.channels} channels"
+        except:
+            return "Audio processing failed"
+    def transcribe_audio(self, audio_path: str) -> str:
+        """Speech-to-text using Vosk or basic audio processing"""
+        # Convert to WAV if needed
+        if not audio_path.endswith('.wav'):
+            try:
+                sound = AudioSegment.from_file(audio_path)
+                audio_path = "temp.wav"
+                sound.export(audio_path, format="wav")
+            except:
+                return "Audio conversion failed"
     def call_llm(self, prompt: str, model: str = "llama3") -> str:
         """Call local Ollama LLM"""