Spaces:

Aptheos
/

wallE

Sleeping

File size: 2,567 Bytes

0685bfc
8d3a981
0685bfc
 
8d3a981
a667aa2
c27abe9
 
 
8d3a981
0685bfc
8d3a981
 
 
 
 
 
 
 
 
 
c27abe9
0685bfc
 
 
 
8d3a981
 
 
c27abe9
8d3a981
 
 
c27abe9
 
 
0685bfc
 
c27abe9
 
 
 
 
 
 
8d3a981
 
 
c27abe9
 
 
 
 
8d3a981
 
c27abe9
8d3a981
 
 
 
 
 
 
c27abe9
8d3a981
 
 
c27abe9
 
 
8d3a981
0685bfc
 
8d3a981
 
c27abe9
 
 
0685bfc
8d3a981
 
c27abe9
 
 
8d3a981
 
0685bfc

import os
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import InferenceClient

# ----------------------
# AUDIO-TO-TEXT SETUP
# ----------------------
device = 0 if torch.cuda.is_available() else "cpu"
AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3"
BATCH_SIZE = 8

pipe = pipeline(
    task="automatic-speech-recognition",
    model=AUDIO_MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

def transcribe(audio_input):
    """Convert audio to text using Whisper."""
    if audio_input is None:
        raise gr.Error("No audio file submitted!")

    output = pipe(
        audio_input,
        batch_size=BATCH_SIZE,
        generate_kwargs={"task": "transcribe"},
        return_timestamps=True
    )
    return output["text"]

# ----------------------
# TEXT ORGANIZATION SETUP
# ----------------------
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

# Ensure HF_TOKEN is loaded as a Space secret
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
    raise ValueError("HF_TOKEN not found! Add it as a secret in your Space settings.")

# Force client to use the HF inference API
client = InferenceClient(token=hf_token, base_url="https://api-inference.huggingface.co")

def build_messages(meeting_transcript) -> list:
    system_input = "You are an assistant that organizes meeting minutes."
    user_input = f"""
    Take this raw meeting transcript and return an organized, sectioned version.
    You may include a summary at the top.

    Transcript:
    {meeting_transcript}
    """
    return [
        {"role": "system", "content": system_input},
        {"role": "user", "content": user_input},
    ]

def organize_text(meeting_transcript):
    messages = build_messages(meeting_transcript)
    response = client.chat_completion(
        messages, model=TEXT_MODEL_NAME, max_tokens=300, seed=42
    )
    return response.choices[0].message.content

# ----------------------
# COMBINED TOOL
# ----------------------
def meeting_transcript_tool(audio_input):
    meeting_text = transcribe(audio_input)
    organized_text = organize_text(meeting_text)
    return organized_text

# ----------------------
# GRADIO INTERFACE
# ----------------------
demo = gr.Interface(
    fn=meeting_transcript_tool,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.Textbox(show_copy_button=True, label="Organized Transcript"),
    title="🪶 Meeting Transcription Tool",
    description="Upload or record an audio file. This app transcribes it using Whisper and organizes the text using Phi-3",
)

demo.launch()