File size: 2,137 Bytes
38818c3
 
 
 
 
0f60365
 
 
38818c3
 
 
0f60365
38818c3
0f60365
38818c3
 
0f60365
 
 
 
 
 
 
 
 
 
 
 
 
 
38818c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash

# Script to download MMS model files on container startup
# Models will be cached in the mounted models directory for subsequent runs

# Use the MODELS_DIR environment variable set by run.sh
# Falls back to default if not set
MODELS_DIR="${MODELS_DIR:-/home/user/app/models}"

echo "Checking and downloading MMS models to $MODELS_DIR..."

# Create models directory if it doesn't exist with proper permissions
mkdir -p "$MODELS_DIR"
chmod 755 "$MODELS_DIR"

# Change to models directory
cd "$MODELS_DIR" || {
    echo "βœ— Failed to change to models directory: $MODELS_DIR"
    echo "Current user: $(whoami)"
    echo "Directory permissions: $(ls -la $(dirname "$MODELS_DIR"))"
    exit 1
}

# Check if we can write to the directory
if [ ! -w "$MODELS_DIR" ]; then
    echo "βœ— No write permission to models directory: $MODELS_DIR"
    echo "Current user: $(whoami)"
    echo "Directory permissions: $(ls -la "$MODELS_DIR")"
    exit 1
fi

# Function to download file if it doesn't exist
download_if_missing() {
    local url="$1"
    local filename="$2"

    if [ -f "$filename" ]; then
        echo "βœ“ $filename already exists, skipping download"
    else
        echo "Downloading $filename..."
        if wget -O "$filename" "$url"; then
            echo "βœ“ Successfully downloaded $filename"
        else
            echo "βœ— Failed to download $filename"
            exit 1
        fi
    fi
}

# Download CTC alignment model files
echo "Downloading CTC alignment model files..."
download_if_missing "https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/dictionary.txt" "ctc_alignment_mling_uroman_model_dict.txt"
download_if_missing "https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/model.pt" "ctc_alignment_mling_uroman_model.pt"

# Download tokenizer and language model
echo "Downloading tokenizer and language model..."
download_if_missing "https://dl.fbaipublicfiles.com/mms/mms_1143_langs_tokenizer_spm.model" "mms_1143_langs_tokenizer_spm.model"
download_if_missing "https://dl.fbaipublicfiles.com/mms/mms_XRI.pt" "mms_XRI.pt"

echo "All model files are ready!"