File size: 2,567 Bytes
0685bfc 8d3a981 0685bfc 8d3a981 a667aa2 c27abe9 8d3a981 0685bfc 8d3a981 c27abe9 0685bfc 8d3a981 c27abe9 8d3a981 c27abe9 0685bfc c27abe9 8d3a981 c27abe9 8d3a981 c27abe9 8d3a981 c27abe9 8d3a981 c27abe9 8d3a981 0685bfc 8d3a981 c27abe9 0685bfc 8d3a981 c27abe9 8d3a981 0685bfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import InferenceClient
# ----------------------
# AUDIO-TO-TEXT SETUP
# ----------------------
device = 0 if torch.cuda.is_available() else "cpu"
AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3"
BATCH_SIZE = 8
pipe = pipeline(
task="automatic-speech-recognition",
model=AUDIO_MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(audio_input):
"""Convert audio to text using Whisper."""
if audio_input is None:
raise gr.Error("No audio file submitted!")
output = pipe(
audio_input,
batch_size=BATCH_SIZE,
generate_kwargs={"task": "transcribe"},
return_timestamps=True
)
return output["text"]
# ----------------------
# TEXT ORGANIZATION SETUP
# ----------------------
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
# Ensure HF_TOKEN is loaded as a Space secret
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN not found! Add it as a secret in your Space settings.")
# Force client to use the HF inference API
client = InferenceClient(token=hf_token, base_url="https://api-inference.huggingface.co")
def build_messages(meeting_transcript) -> list:
system_input = "You are an assistant that organizes meeting minutes."
user_input = f"""
Take this raw meeting transcript and return an organized, sectioned version.
You may include a summary at the top.
Transcript:
{meeting_transcript}
"""
return [
{"role": "system", "content": system_input},
{"role": "user", "content": user_input},
]
def organize_text(meeting_transcript):
messages = build_messages(meeting_transcript)
response = client.chat_completion(
messages, model=TEXT_MODEL_NAME, max_tokens=300, seed=42
)
return response.choices[0].message.content
# ----------------------
# COMBINED TOOL
# ----------------------
def meeting_transcript_tool(audio_input):
meeting_text = transcribe(audio_input)
organized_text = organize_text(meeting_text)
return organized_text
# ----------------------
# GRADIO INTERFACE
# ----------------------
demo = gr.Interface(
fn=meeting_transcript_tool,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox(show_copy_button=True, label="Organized Transcript"),
title="🪶 Meeting Transcription Tool",
description="Upload or record an audio file. This app transcribes it using Whisper and organizes the text using Phi-3",
)
demo.launch()
|