File size: 2,722 Bytes
e039dbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import requests
import json
import uuid
from moviepy import VideoFileClip

def extract_audio(video_path, output_format="mp3"):
    if not video_path:
        return None, "No video provided"

    output_path = f"extracted_audio_{uuid.uuid4().hex[:8]}.{output_format}"

    try:
        video = VideoFileClip(video_path)
        video.audio.write_audiofile(output_path)
        video.close()
        return output_path, f"Audio extracted successfully"
    except Exception as e:
        return None, f"Error extracting audio: {str(e)}"

def transcribe_audio(audio_file, api_key, model_id="scribe_v1"):
    if not api_key:
        return {"error": "Please provide an API key"}

    url = "https://api.elevenlabs.io/v1/speech-to-text"
    headers = {
        "xi-api-key": api_key
    }
    files = {
        "file": open(audio_file, "rb"),
        "model_id": (None, model_id)
    }

    try:
        response = requests.post(url, headers=headers, files=files)
        response.raise_for_status()
        result = response.json()
        return result
    except requests.exceptions.RequestException as e:
        return {"error": f"API request failed: {str(e)}"}
    except json.JSONDecodeError:
        return {"error": "Failed to parse API response"}
    finally:
        files["file"].close()

def save_transcription(transcription):
    if "error" in transcription:
        return None, transcription["error"]

    transcript_filename = f"transcription_{uuid.uuid4().hex[:8]}.txt"

    try:
        with open(transcript_filename, "w", encoding="utf-8") as f:
            f.write(transcription.get('text', 'No text found'))

        return transcript_filename, "Transcription saved as text file"
    except Exception as e:
        return None, f"Error saving transcription: {str(e)}"

def process_video_file(video_file, output_format, elevenlabs_api_key, model_id):
    if video_file is None:
        return None, "Please upload a video file", None, "No video provided", None, None

    audio_path, message = extract_audio(video_file, output_format)

    if audio_path and os.path.exists(audio_path):
        transcription = transcribe_audio(audio_path, elevenlabs_api_key, model_id)
        transcript_file, transcript_message = save_transcription(transcription)

        transcript_text = ""
        if transcript_file:
            with open(transcript_file, "r", encoding="utf-8") as f:
                transcript_text = f.read()

        return audio_path, message, transcript_file, transcript_message, transcript_text, None
    else:
        return None, message, None, "Audio extraction failed, cannot transcribe", None, None