import gradio as gr import os from transformers import pipeline from langdetect import detect def process_audio(audio_file): try: # audio_file is a tuple (file_obj, file_path) audio_path = audio_file if isinstance(audio_file, str) else audio_file.name # Transcribe asr = pipeline("automatic-speech-recognition", model="openai/whisper-large") result = asr(audio_path) transcript = result["text"] except Exception as e: return "Error in transcription: " + str(e), "", "", "" try: detected_lang = detect(transcript) except Exception: detected_lang = "unknown" lang_map = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'} lang_text = lang_map.get(detected_lang, detected_lang) transcript_en = transcript if detected_lang != "en": try: asr_translate = pipeline( "automatic-speech-recognition", model="openai/whisper-large", task="translate" ) result_translate = asr_translate(audio_path) transcript_en = result_translate["text"] except Exception as e: transcript_en = f"Error translating: {e}" try: summarizer = pipeline("summarization", model="facebook/bart-large-cnn") summary = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False) summary_text = summary[0]["summary_text"] except Exception as e: summary_text = f"Error summarizing: {e}" # Optionally, remove uploaded file if it's saved on disk return lang_text, transcript, transcript_en, summary_text with gr.Blocks() as demo: gr.Markdown("## Audio Transcript, Translation & Summary (Powered by Whisper + Hugging Face)") audio_input = gr.Audio(source="upload", type="filepath", label="Upload MP3/WAV Audio") btn = gr