Spaces:
Paused
Paused
File size: 5,038 Bytes
6c226f9 1e5e969 daf7f7b 6c226f9 7c39690 1e5e969 7c39690 6c226f9 daf7f7b 7c39690 daf7f7b 478eee2 3c0cd8e 7c39690 3c0cd8e 6c226f9 1e5e969 7c39690 1e5e969 7c39690 1e5e969 7c39690 7c6ce6c 7c39690 7c6ce6c 7c39690 7c6ce6c 7c39690 1e5e969 7c39690 1e5e969 7c6ce6c 7c39690 7c6ce6c 7c39690 7c6ce6c 0147bd5 7c6ce6c 7c39690 0147bd5 7c39690 7c6ce6c 0147bd5 087dbfe 1e5e969 0147bd5 1e5e969 7c39690 1e5e969 6c226f9 47407ef 6c226f9 3ce82e9 3c0cd8e 478eee2 0147bd5 478eee2 17f14b2 3c0cd8e 1e5e969 3c0cd8e 7c6ce6c 087dbfe 3c0cd8e 3ce82e9 6c226f9 478eee2 0147bd5 478eee2 a5bfe25 6c226f9 087dbfe 6c226f9 7c6ce6c 087dbfe 6c226f9 cc96a73 6c226f9 7c39690 47407ef 7097513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
import requests
import subprocess
from loguru import logger
# Configure loguru
logger.add("app.log", rotation="500 MB", level="DEBUG")
API_URL = "https://skdpcqcdd929o4k3.us-east-1.aws.endpoints.huggingface.cloud"
# Check if ffmpeg is installed
def check_ffmpeg():
try:
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
logger.info("ffmpeg check passed successfully")
except (subprocess.CalledProcessError, FileNotFoundError) as e:
logger.error(f"ffmpeg check failed: {str(e)}")
raise gr.Error("ffmpeg is not installed. Please install ffmpeg to use this application.")
# Initialize ffmpeg check
check_ffmpeg()
def transcribe(inputs):
if inputs is None:
logger.warning("No audio file submitted")
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
headers = {
"Accept": "application/json",
"Content-Type": "audio/flac"
}
logger.debug(f"Using headers: {headers}")
try:
logger.info(f"Reading audio file: {inputs}")
with open(inputs, "rb") as f:
data = f.read()
logger.debug(f"Audio file size: {len(data)} bytes")
# Add parameters to request
params = {
"return_timestamps": True
}
logger.debug(f"Request parameters: {params}")
logger.info("Sending request to API")
response = requests.post(API_URL, headers=headers, data=data, params=params)
logger.debug(f"API Response status: {response.status_code}")
result = response.json()
logger.debug(f"API Response: {result}")
if "error" in result:
logger.error(f"API returned error: {result['error']}")
raise gr.Error(f"API Error: {result['error']}")
if "text" not in result:
logger.error("No transcription text in response")
raise gr.Error("No transcription text in response")
# Format response as JSON
formatted_result = {
"text": result["text"],
"chunks": []
}
if "chunks" in result:
logger.info(f"Processing {len(result['chunks'])} chunks")
for i, chunk in enumerate(result["chunks"]):
logger.debug(f"Processing chunk {i}: {chunk}")
try:
start_time = chunk.get("timestamp", [None, None])[0]
end_time = chunk.get("timestamp", [None, None])[1]
text = chunk.get("text", "").strip()
if start_time is not None and end_time is not None:
formatted_result["chunks"].append({
"text": text,
"timestamp": [start_time, end_time]
})
else:
logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
except Exception as chunk_error:
logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
continue
else:
logger.info("No chunks found, using single chunk")
formatted_result["chunks"].append({
"text": result["text"],
"timestamp": [0.0, None]
})
logger.info(f"Successfully processed transcription with {len(formatted_result['chunks'])} chunks")
return formatted_result
except Exception as e:
logger.exception(f"Error during transcription: {str(e)}")
raise gr.Error(f"Failed to transcribe audio: {str(e)}")
demo = gr.Blocks(theme=gr.themes.Ocean())
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
],
outputs=[
gr.JSON(label="Transcription", open=True),
],
title="Whisper Large V3 Turbo: Transcribe Audio",
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! "
),
flagging_mode="manual",
flagging_options=[
"Incorrect text",
"Incorrect timestamp",
"Other issue"
],
flagging_dir="flagged_data"
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources="upload", type="filepath", label="Audio file"),
],
outputs=[
gr.JSON(label="Transcription", open=True),
],
title="Whisper Large V3: Transcribe Audio",
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! "
),
flagging_mode="manual",
flagging_options=[
"Incorrect text",
"Incorrect timestamp",
"Other issue"
],
flagging_dir="flagged_data"
)
with demo:
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
logger.info("Starting Gradio interface")
demo.queue().launch(ssr_mode=False)
|