File size: 2,638 Bytes
5675d05
 
 
 
 
 
 
bd51214
 
5675d05
 
bd51214
5675d05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd51214
 
 
5675d05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd51214
5675d05
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import io
import os  
import shutil
import subprocess
import requests
import uuid
from smolagents import tool
import dotenv
dotenv.load_dotenv()

@tool
def get_text_transcript_from_audio_file(file_url: str, language: str = "en-US") -> str:
    """

    Convert speech to text using local whisper model.

    This function downloads an audio file from a given URL, converts it to WAV format if necessary,

    then use whisper model to transcribe the audio to text.

    

    Args:

        file_url (str): The URL of the audio file to transcribe.        

        language (str): The language code for the transcription. Default is "en-US".

        

    Returns:

        str: The transcribed text.

    """

    file_name = uuid.uuid4().hex +".mp3"

    dest_folder = os.getenv("STT_FOLDER")
    if not dest_folder:
        dest_folder = '.'
    file_path = os.path.join(dest_folder + "\\tmp", file_name)
    # 1. download the file from url (in pure python without wget or curl)
    if not os.path.exists(file_name):            
        response = requests.get(file_url)
        if response.status_code == 200:
            with open(file_path, "wb") as f:
                f.write(response.content)
        else:
            raise Exception(f"Error downloading file: {response.status_code}")
    
    # 2. if it is a mp3 convert to wav with ffmpeg exec
    if file_name.endswith(".mp3"):
        cmd = f"ffmpeg -i {file_path} -ac 1 -ar 16000 -c:a pcm_s16le {file_path[:-4]}.wav"
        cmd_as_list = cmd.split()
        subprocess.run(cmd_as_list, cwd=dest_folder, check=True)
        file_path = file_path[:-4] + ".wav"
        file_name = file_name[:-4] + ".wav"

    # 3. copy file to data folder
    shutil.copy2(file_path, os.path.join(dest_folder, "testdata/"))
    

    # 4. call docker run  command
    docker_command = f"""

        docker run 

        -v {dest_folder}/models:/app/models 

        -v {dest_folder}/testdata:/app/testdata 

        ghcr.io/appleboy/go-whisper:latest 

        --model /app/models/ggml-small.bin 

        --audio-path /app/testdata/{file_name}

    """

    subprocess.run(docker_command.split(), cwd=dest_folder, check=True)
    # 5. cat the output file an return it   
    output_filepath = os.path.join(dest_folder, "testdata", f"{file_name[:-4]}.txt")
    with open(output_filepath, "r") as f:
        text = f.read()
    return text

if __name__ == "__main__":
    transcript = get_text_transcript_from_audio_file("https://agents-course-unit4-scoring.hf.space/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3", )
    print(transcript)