agent_course_final / audio_analyzer.py
George Sergia
Add new tools. Fix bugs
b11304e
raw
history blame
1.33 kB
from typing import Optional
from google import genai
from google.genai import types
import requests
import os
def analyze_audio(audio_url: str, analysis_prompt: Optional[str] = None) -> str:
"""
Transcribes audio files to text using Google Gemini model. Supports various audio formats including MPEG, MP3, WAV, M4A, etc.
Args:
audio_url (str): Url path to an audio file to anlyze
analysis_prompt (Optional[str]): Optional prompt for specific analysis focus
Returns:
str: Text containing analysis results
"""
try:
# Initialize Google Gen client
gemini_llm = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
print(f"Analyzing audio from URL {audio_url}")
text=analysis_prompt or "Provide a detailed transcription of this audio."
audio_bytes = requests.get(audio_url).content
audio = types.Part.from_bytes(data=audio_bytes, mime_type="audio/mpeg")
# Get response from Gemini Flash 2.0 Vision
response = gemini_llm.models.generate_content(
model="gemini-2.0-flash",
contents=[text, audio],
)
print(response.text)
return response.text
except Exception as e:
return {"error": f"Error analyzing audio: {str(e)}"}