Spaces:
Paused
Paused
import gradio as gr | |
import requests | |
import subprocess | |
from loguru import logger | |
# Configure loguru | |
logger.add("app.log", rotation="500 MB", level="DEBUG") | |
API_URL = "https://skdpcqcdd929o4k3.us-east-1.aws.endpoints.huggingface.cloud" | |
# Check if ffmpeg is installed | |
def check_ffmpeg(): | |
try: | |
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True) | |
logger.info("ffmpeg check passed successfully") | |
except (subprocess.CalledProcessError, FileNotFoundError) as e: | |
logger.error(f"ffmpeg check failed: {str(e)}") | |
raise gr.Error("ffmpeg is not installed. Please install ffmpeg to use this application.") | |
# Initialize ffmpeg check | |
check_ffmpeg() | |
def transcribe(inputs): | |
if inputs is None: | |
logger.warning("No audio file submitted") | |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") | |
headers = { | |
"Accept": "application/json", | |
"Content-Type": "audio/flac" | |
} | |
logger.debug(f"Using headers: {headers}") | |
try: | |
logger.info(f"Reading audio file: {inputs}") | |
with open(inputs, "rb") as f: | |
data = f.read() | |
logger.debug(f"Audio file size: {len(data)} bytes") | |
# Add parameters to request | |
params = { | |
"return_timestamps": True | |
} | |
logger.debug(f"Request parameters: {params}") | |
logger.info("Sending request to API") | |
response = requests.post(API_URL, headers=headers, data=data, params=params) | |
logger.debug(f"API Response status: {response.status_code}") | |
result = response.json() | |
logger.debug(f"API Response: {result}") | |
if "error" in result: | |
logger.error(f"API returned error: {result['error']}") | |
raise gr.Error(f"API Error: {result['error']}") | |
if "text" not in result: | |
logger.error("No transcription text in response") | |
raise gr.Error("No transcription text in response") | |
# Format response as JSON | |
formatted_result = { | |
"text": result["text"], | |
"chunks": [] | |
} | |
if "chunks" in result: | |
logger.info(f"Processing {len(result['chunks'])} chunks") | |
for i, chunk in enumerate(result["chunks"]): | |
logger.debug(f"Processing chunk {i}: {chunk}") | |
try: | |
start_time = chunk.get("timestamp", [None, None])[0] | |
end_time = chunk.get("timestamp", [None, None])[1] | |
text = chunk.get("text", "").strip() | |
if start_time is not None and end_time is not None: | |
formatted_result["chunks"].append({ | |
"text": text, | |
"timestamp": [start_time, end_time] | |
}) | |
else: | |
logger.warning(f"Invalid timestamp in chunk {i}: {chunk}") | |
except Exception as chunk_error: | |
logger.error(f"Error processing chunk {i}: {str(chunk_error)}") | |
continue | |
else: | |
logger.info("No chunks found, using single chunk") | |
formatted_result["chunks"].append({ | |
"text": result["text"], | |
"timestamp": [0.0, None] | |
}) | |
logger.info(f"Successfully processed transcription with {len(formatted_result['chunks'])} chunks") | |
return formatted_result | |
except Exception as e: | |
logger.exception(f"Error during transcription: {str(e)}") | |
raise gr.Error(f"Failed to transcribe audio: {str(e)}") | |
demo = gr.Blocks(theme=gr.themes.Ocean()) | |
mf_transcribe = gr.Interface( | |
fn=transcribe, | |
inputs=[ | |
gr.Audio(sources="microphone", type="filepath"), | |
], | |
outputs=[ | |
gr.JSON(label="Transcription", open=True), | |
], | |
title="Whisper Large V3 Turbo: Transcribe Audio", | |
description=( | |
"Transcribe long-form microphone or audio inputs with the click of a button! " | |
), | |
flagging_mode="manual", | |
flagging_options=[ | |
"Incorrect text", | |
"Incorrect timestamp", | |
"Other issue" | |
], | |
flagging_dir="flagged_data" | |
) | |
file_transcribe = gr.Interface( | |
fn=transcribe, | |
inputs=[ | |
gr.Audio(sources="upload", type="filepath", label="Audio file"), | |
], | |
outputs=[ | |
gr.JSON(label="Transcription", open=True), | |
], | |
title="Whisper Large V3: Transcribe Audio", | |
description=( | |
"Transcribe long-form microphone or audio inputs with the click of a button! " | |
), | |
flagging_mode="manual", | |
flagging_options=[ | |
"Incorrect text", | |
"Incorrect timestamp", | |
"Other issue" | |
], | |
flagging_dir="flagged_data" | |
) | |
with demo: | |
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"]) | |
logger.info("Starting Gradio interface") | |
demo.queue().launch(ssr_mode=False) | |