Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
from langdetect import detect | |
from transformers import pipeline | |
import os | |
openai.api_key = os.getenv("OPENAI_API_KEY") # Set this in HF Space secrets | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
def make_str(val): | |
# Always return string, even if val is None, bool, list, dict, etc. | |
try: | |
if val is None: | |
return "" | |
if isinstance(val, (bool, int, float)): | |
return str(val) | |
if isinstance(val, list): | |
return "\n".join([make_str(v) for v in val]) | |
if isinstance(val, dict): | |
return str(val) | |
return str(val) | |
except Exception: | |
return "" | |
def process_audio(audio_path): | |
# Accept only valid, non-empty file path (string) | |
if not audio_path or not isinstance(audio_path, str): | |
return ("No audio file provided.", "", "", "") | |
try: | |
with open(audio_path, "rb") as audio_file: | |
transcript = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
transcript = make_str(transcript).strip() | |
except Exception as e: | |
return (make_str(f"Error in transcription: {e}"), "", "", "") | |
try: | |
detected_lang = detect(transcript) | |
lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang) | |
except Exception: | |
lang_text = "unknown" | |
transcript_en = transcript | |
if detected_lang != "en": | |
try: | |
with open(audio_path, "rb") as audio_file: | |
transcript_en = openai.audio.translations.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
transcript_en = make_str(transcript_en).strip() | |
except Exception as e: | |
transcript_en = make_str(f"Error translating: {e}") | |
try: | |
summary = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False) | |
# Make sure we always extract a string summary, never bool/None/etc. | |
if isinstance(summary, list) and len(summary) > 0 and "summary_text" in summary[0]: | |
summary_text = make_str(summary[0]["summary_text"]) | |
else: | |
summary_text = make_str(summary) | |
except Exception as e: | |
summary_text = make_str(f"Error summarizing: {e}") | |
# Return only strings, never bool/None/dict/list | |
return (make_str(lang_text), make_str(transcript), make_str(transcript_en), make_str(summary_text)) | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"), | |
outputs=[ | |
gr.Textbox(label="Detected Language"), | |
gr.Textbox(label="Original Transcript"), | |
gr.Textbox(label="English Transcript (if translated)"), | |
gr.Textbox(label="Summary"), | |
], | |
title="Audio Transcript, Translation & Summary (via OpenAI Whisper API)", | |
description="Upload your audio file (MP3/WAV). This app transcribes via OpenAI Whisper API, detects language, translates to English if needed, and summarizes." | |
) | |
iface.launch() | |