jaisun2004's picture
Update app.py
4698e24 verified
raw
history blame
3.21 kB
import gradio as gr
import openai
from langdetect import detect
from transformers import pipeline
import os
openai.api_key = os.getenv("OPENAI_API_KEY") # Set this in HF Space secrets
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def make_str(val):
# Always return string, even if val is None, bool, list, dict, etc.
try:
if val is None:
return ""
if isinstance(val, (bool, int, float)):
return str(val)
if isinstance(val, list):
return "\n".join([make_str(v) for v in val])
if isinstance(val, dict):
return str(val)
return str(val)
except Exception:
return ""
def process_audio(audio_path):
# Accept only valid, non-empty file path (string)
if not audio_path or not isinstance(audio_path, str):
return ("No audio file provided.", "", "", "")
try:
with open(audio_path, "rb") as audio_file:
transcript = openai.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
transcript = make_str(transcript).strip()
except Exception as e:
return (make_str(f"Error in transcription: {e}"), "", "", "")
try:
detected_lang = detect(transcript)
lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
except Exception:
lang_text = "unknown"
transcript_en = transcript
if detected_lang != "en":
try:
with open(audio_path, "rb") as audio_file:
transcript_en = openai.audio.translations.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
transcript_en = make_str(transcript_en).strip()
except Exception as e:
transcript_en = make_str(f"Error translating: {e}")
try:
summary = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False)
# Make sure we always extract a string summary, never bool/None/etc.
if isinstance(summary, list) and len(summary) > 0 and "summary_text" in summary[0]:
summary_text = make_str(summary[0]["summary_text"])
else:
summary_text = make_str(summary)
except Exception as e:
summary_text = make_str(f"Error summarizing: {e}")
# Return only strings, never bool/None/dict/list
return (make_str(lang_text), make_str(transcript), make_str(transcript_en), make_str(summary_text))
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"),
outputs=[
gr.Textbox(label="Detected Language"),
gr.Textbox(label="Original Transcript"),
gr.Textbox(label="English Transcript (if translated)"),
gr.Textbox(label="Summary"),
],
title="Audio Transcript, Translation & Summary (via OpenAI Whisper API)",
description="Upload your audio file (MP3/WAV). This app transcribes via OpenAI Whisper API, detects language, translates to English if needed, and summarizes."
)
iface.launch()