Spaces:

MicroHealth
/

AV-to-transcripts

Paused

File size: 6,287 Bytes

import io
import os
import tempfile
import threading
import base64
from urllib.parse import urlparse

import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate

import requests
from pytube import YouTube
from pydub import AudioSegment
import google.generativeai as genai
from moviepy.editor import VideoFileClip

# Initialize the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Retrieve the Google API key from Hugging Face Spaces
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

# Initialize Gemini model
model = genai.GenerativeModel('gemini-2.0-flash-lite')

def is_valid_url(url):
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])
    except ValueError:
        return False

def download_media(url):
    if "youtube.com" in url or "youtu.be" in url:
        yt = YouTube(url)
        stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
            stream.download(output_path=os.path.dirname(temp_file.name), filename=temp_file.name)
            return temp_file.name
    else:
        response = requests.get(url)
        content_type = response.headers.get('content-type', '')
        if 'video' in content_type:
            suffix = '.mp4'
        elif 'audio' in content_type:
            suffix = '.mp3'
        else:
            suffix = ''
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
            temp_file.write(response.content)
            return temp_file.name

def extract_audio(file_path):
    video = VideoFileClip(file_path)
    audio = video.audio
    audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    audio.write_audiofile(audio_file.name)
    video.close()
    audio.close()
    return audio_file.name

def transcribe_audio(file_path):
    with open(file_path, "rb") as audio_file:
        audio_data = audio_file.read()
    
    response = model.generate_content(audio_data)
    return response.text

def process_media(contents, filename, url):
    if contents:
        content_type, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        suffix = os.path.splitext(filename)[1]
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
            temp_file.write(decoded)
            temp_file_path = temp_file.name
    elif url:
        temp_file_path = download_media(url)
    else:
        raise ValueError("No input provided")

    try:
        if temp_file_path.lower().endswith(('.mp4', '.avi', '.mov', '.flv', '.wmv')):
            audio_file_path = extract_audio(temp_file_path)
            transcript = transcribe_audio(audio_file_path)
            os.unlink(audio_file_path)
        else:
            transcript = transcribe_audio(temp_file_path)
    finally:
        os.unlink(temp_file_path)

    return transcript

app.layout = dbc.Container([
    html.H1("Audio/Video Transcription App", className="text-center my-4"),
    dbc.Card([
        dbc.CardBody([
            dcc.Upload(
                id='upload-media',
                children=html.Div([
                    'Drag and Drop or ',
                    html.A('Select Audio/Video File')
                ]),
                style={
                    'width': '100%',
                    'height': '60px',
                    'lineHeight': '60px',
                    'borderWidth': '1px',
                    'borderStyle': 'dashed',
                    'borderRadius': '5px',
                    'textAlign': 'center',
                    'margin': '10px'
                },
                multiple=False
            ),
            dbc.Input(id="media-url", type="text", placeholder="Enter audio/video URL or YouTube link", className="my-3"),
            dbc.Button("Transcribe", id="transcribe-button", color="primary", className="w-100 mb-3"),
            dbc.Spinner(html.Div(id="transcription-output", className="mt-3")),
            dbc.Button("Download Transcript", id="download-button", color="secondary", className="w-100 mt-3", style={'display': 'none'}),
            dcc.Download(id="download-transcript")
        ])
    ])
])

@app.callback(
    Output("transcription-output", "children"),
    Output("download-button", "style"),
    Input("transcribe-button", "n_clicks"),
    State("upload-media", "contents"),
    State("upload-media", "filename"),
    State("media-url", "value"),
    prevent_initial_call=True
)
def update_transcription(n_clicks, contents, filename, url):
    if not contents and not url:
        raise PreventUpdate

    def transcribe():
        try:
            return process_media(contents, filename, url)
        except Exception as e:
            return f"An error occurred: {str(e)}"

    thread = threading.Thread(target=transcribe)
    thread.start()
    thread.join(timeout=600)  # 10 minutes timeout

    if thread.is_alive():
        return "Transcription timed out after 10 minutes", {'display': 'none'}

    transcript = getattr(thread, 'result', "Transcription failed")

    if transcript and not transcript.startswith("An error occurred"):
        return dbc.Card([
            dbc.CardBody([
                html.H5("Transcription Result"),
                html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
            ])
        ]), {'display': 'block'}
    else:
        return transcript, {'display': 'none'}

@app.callback(
    Output("download-transcript", "data"),
    Input("download-button", "n_clicks"),
    State("transcription-output", "children"),
    prevent_initial_call=True
)
def download_transcript(n_clicks, transcription_output):
    if not transcription_output:
        raise PreventUpdate
    
    transcript = transcription_output['props']['children'][0]['props']['children'][1]['props']['children']
    return dict(content=transcript, filename="transcript.txt")

if __name__ == '__main__':
    print("Starting the Dash application...")
    app.run(debug=True, host='0.0.0.0', port=7860)
    print("Dash application has finished running.")