Voxtral WebGPU

State-of-the-art audio transcription directly in your browser.

You are about to load{" "} Voxtral-Mini , a 4.68B parameter model, optimized for inference on the web.

Everything runs entirely in your browser with Transformers.js and{" "} ONNX Runtime Web, meaning no data is sent to a server.

Get started by clicking the button below.

{audioSaveError && (

{audioSaveError}

)} {!viewedTranscription ? (

Select a transcription

Choose an item from the history or add a new file to begin.

) : (

{editingFilename ? ( setViewedTranscription({ ...viewedTranscription, filename: e.target.value })} onBlur={() => { setEditingFilename(false); updateFilename(viewedTranscription.id, viewedTranscription.filename); }} onKeyDown={(e) => { if (e.key === "Enter" || e.key === "Escape") { e.preventDefault(); setEditingFilename(false); updateFilename(viewedTranscription.id, viewedTranscription.filename); } }} /> ) : (

setEditingFilename(true)} > {viewedTranscription.filename}

)}

{viewedTranscription.date}

updateTranscriptionText(viewedTranscription.id, e.target.value)}
                      readOnly={!!isProcessing}
                    />
                  </div>
                  {isProcessing && (
                    <div className="absolute inset-0 flex items-center justify-center bg-gray-900/90 rounded-lg z-10">
                      <div className="flex flex-col items-center text-gray-400 relative">
                        <span className="relative flex h-10 w-10">
                          <span className="relative inline-flex rounded-full h-10 w-10 items-center justify-center animate-spin-slow">
                            <svg className="h-7 w-7 text-purple-400" viewBox="0 0 24 24" fill="none">
                              <circle
                                className="opacity-30"
                                cx="12"
                                cy="12"
                                r="10"
                                stroke="currentColor"
                                strokeWidth="4"
                              />
                              <path
                                d="M22 12a10 10 0 00-10-10"
                                stroke="currentColor"
                                strokeWidth="4"
                                strokeLinecap="round"
                                className="text-purple-500"
                              />
                            </svg>
                          </span>
                        </span>
                        <span className="mt-2 pointer-events-none">
                          {transcription.length === 0 ? "Processing audio..." : "Transcribing..."}
                        </span>
                      </div>
                    </div>
                  )}
                </div>
                <div className="flex justify-between mt-4">
                  {isProcessing ? (
                    <button
                      className="bottom-8 left-8 z-20 px-4 py-2 bg-gray-800 text-red-400 rounded shadow-lg transition-colors text-base font-medium flex items-center gap-2 cursor-pointer hover:bg-red-900 hover:text-white"
                      title="Stop transcription"
                      onClick={stopTranscription}
                    >
                      <svg className="w-5 h-5" fill="none" stroke="currentColor" strokeWidth="2" viewBox="0 0 24 24">
                        <rect x="6" y="6" width="12" height="12" rx="2" fill="currentColor" />
                      </svg>
                      Stop
                    </button>
                  ) : (
                    <span />
                  )}
                  <button
                    className={`bottom-8 right-8 z-20 px-4 py-2 bg-gray-800 text-gray-200 rounded shadow-lg transition-colors text-base font-medium flex items-center gap-2 cursor-pointer disabled:opacity-50 disabled:cursor-not-allowed${
                      viewedTranscription.text ? " hover:bg-purple-700" : ""
                    }`}
                    title="Download transcript"
                    disabled={!viewedTranscription.text}
                    onClick={() => {
                      const baseName = viewedTranscription.filename;
                      const filename = `${baseName}.txt`;
                      const blob = new Blob([viewedTranscription.text ?? ""], { type: "text/plain" });
                      const url = URL.createObjectURL(blob);
                      const a = document.createElement("a");
                      a.href = url;
                      a.download = filename;
                      document.body.appendChild(a);
                      a.click();
                      setTimeout(() => {
                        URL.revokeObjectURL(url);
                        document.body.removeChild(a);
                      }, 100);
                    }}
                  >
                    <svg className="w-5 h-5" fill="none" stroke="currentColor" strokeWidth="2" viewBox="0 0 24 24">
                      <path
                        strokeLinecap="round"
                        strokeLinejoin="round"
                        d="M4 16v2a2 2 0 002 2h12a2 2 0 002-2v-2M7 10l5 5 5-5M12 15V3"
                      />
                    </svg>
                    Download
                  </button>
                </div>
              </div>
            )}
          </div>
        </div>
      </main>
    </div>
  );
}

Voxtral WebGPU

Loading Voxtral Model...

Select a transcription

setEditingFilename(true)} > {viewedTranscription.filename}