Spaces:

EnDevSols
/

ASR-Arabic-JS

Running

File size: 4,560 Bytes

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Audio Transcription and Similarity Checker</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            background-color: #f4f4f4;
            padding: 20px;
        }
        .container {
            max-width: 700px;
            margin: 0 auto;
            background: #fff;
            padding: 20px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }
        h1 {
            text-align: center;
        }
        .button {
            background-color: #e8b62c;
            color: white;
            padding: 10px 20px;
            text-align: center;
            cursor: pointer;
            border: none;
            margin-top: 10px;
            display: block;
            width: 100%;
        }
        .audio-upload {
            margin-top: 20px;
            text-align: center;
        }
        .result {
            margin-top: 20px;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Audio Transcription and Similarity Checker</h1>
        <div id="original-audio" class="audio-upload">
            <h2>Upload Original Audio</h2>
            <input type="file" id="originalFile" accept="audio/*">
        </div>

        <div id="user-audio" class="audio-upload">
            <h2>Upload User Audio</h2>
            <input type="file" id="userFile" accept="audio/*">
        </div>

        <button id="transcribeButton" class="button">Perform Transcription and Testing</button>

        <div id="result" class="result"></div>
    </div>

    <script src="https://cdn.jsdelivr.net/npm/@huggingface/transformers"></script>
    <script>
        const MODEL_ID = "facebook/wav2vec2-large-960h";  // Sample model, change if necessary
        let processor, model;

        // Load model and processor
        async function loadModel() {
            processor = await transformers.AutoProcessor.from_pretrained(MODEL_ID);
            model = await transformers.Wav2Vec2ForCTC.from_pretrained(MODEL_ID);
        }

        async function transcribe(audioFile) {
            const arrayBuffer = await audioFile.arrayBuffer();
            const audioData = new Float32Array(arrayBuffer);
            
            const inputValues = processor(audioData, {return_tensors: "pt", padding: true}).input_values;
            const logits = await model(inputValues).logits;
            const predicted_ids = logits.argmax(-1);
            const transcription = processor.decode(predicted_ids, {skip_special_tokens: true});
            return transcription;
        }

        document.getElementById("transcribeButton").addEventListener("click", async () => {
            const originalFile = document.getElementById("originalFile").files[0];
            const userFile = document.getElementById("userFile").files[0];

            if (originalFile && userFile) {
                const transcriptionOriginal = await transcribe(originalFile);
                const transcriptionUser = await transcribe(userFile);

                const levenshteinDistance = (a, b) => {
                    let dp = Array.from({length: a.length + 1}, () => Array(b.length + 1).fill(0));
                    for (let i = 0; i <= a.length; i++) dp[i][0] = i;
                    for (let j = 0; j <= b.length; j++) dp[0][j] = j;
                    for (let i = 1; i <= a.length; i++) {
                        for (let j = 1; j <= b.length; j++) {
                            dp[i][j] = a[i - 1] === b[j - 1] ? dp[i - 1][j - 1] : Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1;
                        }
                    }
                    return dp[a.length][b.length];
                };

                const similarityScore = 1 - levenshteinDistance(transcriptionOriginal, transcriptionUser) / Math.max(transcriptionOriginal.length, transcriptionUser.length);

                document.getElementById("result").innerHTML = `
                    <h2>Transcription Results</h2>
                    <p><strong>Original Transcription:</strong> ${transcriptionOriginal}</p>
                    <p><strong>User Transcription:</strong> ${transcriptionUser}</p>
                    <p><strong>Levenshtein Similarity Score:</strong> ${similarityScore.toFixed(2)}</p>
                `;
            } else {
                alert("Please upload both audio files.");
            }
        });

        loadModel();
    </script>
</body>
</html>