wt002 commited on
Commit
206048d
·
verified ·
1 Parent(s): 391c163

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -11
app.py CHANGED
@@ -2,9 +2,10 @@ import os
2
  import gradio as gr
3
  import requests
4
 
 
 
5
  import json
6
  from typing import List, Dict, Union
7
- import speech_recognition as sr
8
  from pydub import AudioSegment
9
  import wikipediaapi
10
  import pandas as pd
@@ -19,18 +20,9 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
  # --- Basic Agent Definition ---
20
  class BasicAgent:
21
  def __init__(self, ollama_base_url: str = "http://localhost:11434"):
22
- """
23
- Pure Python agent with:
24
- - Local LLM via Ollama
25
- - Web search (SearxNG)
26
- - Wikipedia access
27
- - Document processing
28
- - Speech-to-text
29
- """
30
  self.ollama_url = f"{ollama_base_url}/api/generate"
31
- self.searx_url = "https://searx.space/search" # Public Searx instance
32
  self.wiki = wikipediaapi.Wikipedia('en')
33
- self.recognizer = sr.Recognizer()
34
 
35
  print("BasicAgent initialized.")
36
 
@@ -40,8 +32,69 @@ class BasicAgent:
40
  print(f"Agent returning answer: {fixed_answer}")
41
  return fixed_answer
42
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
 
45
 
46
  def call_llm(self, prompt: str, model: str = "llama3") -> str:
47
  """Call local Ollama LLM"""
 
2
  import gradio as gr
3
  import requests
4
 
5
+ import os
6
+ import requests
7
  import json
8
  from typing import List, Dict, Union
 
9
  from pydub import AudioSegment
10
  import wikipediaapi
11
  import pandas as pd
 
20
  # --- Basic Agent Definition ---
21
  class BasicAgent:
22
  def __init__(self, ollama_base_url: str = "http://localhost:11434"):
 
 
 
 
 
 
 
 
23
  self.ollama_url = f"{ollama_base_url}/api/generate"
24
+ self.searx_url = "https://searx.space/search"
25
  self.wiki = wikipediaapi.Wikipedia('en')
 
26
 
27
  print("BasicAgent initialized.")
28
 
 
32
  print(f"Agent returning answer: {fixed_answer}")
33
  return fixed_answer
34
 
35
+ # Initialize Vosk if available
36
+ self.vosk_model = None
37
+ try:
38
+ from vosk import Model, KaldiRecognizer
39
+ model_path = "vosk-model-small-en-us-0.15"
40
+ if os.path.exists(model_path):
41
+ self.vosk_model = Model(model_path)
42
+ except ImportError:
43
+ pass
44
 
45
+ def transcribe_audio(self, audio_path: str) -> str:
46
+ """Speech-to-text using Vosk or basic audio processing"""
47
+ # Convert to WAV if needed
48
+ if not audio_path.endswith('.wav'):
49
+ try:
50
+ sound = AudioSegment.from_file(audio_path)
51
+ audio_path = "temp.wav"
52
+ sound.export(audio_path, format="wav")
53
+ except:
54
+ return "Audio conversion failed"
55
+
56
+ # Try Vosk first if available
57
+ if self.vosk_model:
58
+ try:
59
+ from vosk import KaldiRecognizer
60
+ import wave
61
+ wf = wave.open(audio_path, "rb")
62
+ rec = KaldiRecognizer(self.vosk_model, wf.getframerate())
63
+
64
+ results = []
65
+ while True:
66
+ data = wf.readframes(4000)
67
+ if len(data) == 0:
68
+ break
69
+ if rec.AcceptWaveform(data):
70
+ results.append(json.loads(rec.Result()))
71
+
72
+ final = json.loads(rec.FinalResult())
73
+ if final['text']:
74
+ results.append(final)
75
+ return " ".join([r['text'] for r in results if 'text' in r])
76
+ except Exception as e:
77
+ return f"Vosk Error: {str(e)}"
78
+
79
+ # Fallback: Return audio metadata
80
+ try:
81
+ sound = AudioSegment.from_file(audio_path)
82
+ return f"Audio file: {sound.duration_seconds} seconds, {sound.channels} channels"
83
+ except:
84
+ return "Audio processing failed"
85
+
86
+ def transcribe_audio(self, audio_path: str) -> str:
87
+ """Speech-to-text using Vosk or basic audio processing"""
88
+ # Convert to WAV if needed
89
+ if not audio_path.endswith('.wav'):
90
+ try:
91
+ sound = AudioSegment.from_file(audio_path)
92
+ audio_path = "temp.wav"
93
+ sound.export(audio_path, format="wav")
94
+ except:
95
+ return "Audio conversion failed"
96
 
97
+
98
 
99
  def call_llm(self, prompt: str, model: str = "llama3") -> str:
100
  """Call local Ollama LLM"""