import { useState, useRef, useEffect, useCallback, useMemo } from "react"; import { useVoxtral } from "./useVoxtral"; import HfIcon from "./HfIcon"; type Transcription = { id: string; filename: string; date: string; text: string | null; audioKey?: string; language: string; }; type Screen = "intro" | "loading" | "main"; const LANGUAGES = [ { code: "en", label: "English", icon: "🇬🇧" }, { code: "fr", label: "Français", icon: "🇫🇷" }, { code: "de", label: "Deutsch", icon: "🇩🇪" }, { code: "es", label: "Español", icon: "🇪🇸" }, { code: "it", label: "Italiano", icon: "🇮🇹" }, { code: "pt", label: "Português", icon: "🇵🇹" }, { code: "nl", label: "Nederlands", icon: "🇳🇱" }, { code: "hi", label: "हिन्दी", icon: "🇮🇳" }, ]; const DB_NAME = "voxtral-db"; const DB_VERSION = 1; const HISTORY_STORE = "history"; const AUDIO_STORE = "audio"; let dbPromise: Promise | null = null; function getDB(): Promise { if (!dbPromise) { dbPromise = new Promise((resolve, reject) => { const req = indexedDB.open(DB_NAME, DB_VERSION); req.onupgradeneeded = () => { const db = req.result; if (!db.objectStoreNames.contains(HISTORY_STORE)) { db.createObjectStore(HISTORY_STORE, { keyPath: "id" }); } if (!db.objectStoreNames.contains(AUDIO_STORE)) { db.createObjectStore(AUDIO_STORE, { keyPath: "key" }); } }; req.onsuccess = () => resolve(req.result); req.onerror = () => { dbPromise = null; reject(req.error); }; }); } return dbPromise; } function promiseify(request: IDBRequest): Promise { return new Promise((resolve, reject) => { request.onsuccess = () => resolve(request.result); request.onerror = () => reject(request.error); }); } function transactionPromise(tx: IDBTransaction): Promise { return new Promise((resolve, reject) => { tx.oncomplete = () => resolve(); tx.onerror = () => reject(tx.error); }); } async function getHistoryDB(): Promise { const db = await getDB(); const tx = db.transaction(HISTORY_STORE, "readonly"); const store = tx.objectStore(HISTORY_STORE); return (await promiseify(store.getAll())) as Transcription[]; } async function saveHistoryDB(history: Transcription[]) { const db = await getDB(); const tx = db.transaction(HISTORY_STORE, "readwrite"); const store = tx.objectStore(HISTORY_STORE); history.forEach((item) => store.put(item)); return transactionPromise(tx); } async function removeHistoryItemDB(id: string) { const db = await getDB(); const tx = db.transaction(HISTORY_STORE, "readwrite"); tx.objectStore(HISTORY_STORE).delete(id); return transactionPromise(tx); } async function saveAudioToDB(key: string, file: File): Promise { const db = await getDB(); const arrayBuffer = await file.arrayBuffer(); const tx = db.transaction(AUDIO_STORE, "readwrite"); tx.objectStore(AUDIO_STORE).put({ key, buffer: arrayBuffer, type: file.type }); return transactionPromise(tx); } async function getAudioUrlFromDB(key: string): Promise { try { const db = await getDB(); const tx = db.transaction(AUDIO_STORE, "readonly"); const result = await promiseify(tx.objectStore(AUDIO_STORE).get(key)); if (result?.buffer) { const blob = new Blob([result.buffer], { type: result.type || "audio/wav" }); return URL.createObjectURL(blob); } return null; } catch { return null; } } async function removeAudioFromDB(key?: string) { if (!key) return; const db = await getDB(); const tx = db.transaction(AUDIO_STORE, "readwrite"); tx.objectStore(AUDIO_STORE).delete(key); return transactionPromise(tx); } function inferAudioKey(item: Transcription): string | undefined { return item.audioKey ?? (item.id ? `voxtral_audio_${item.id}` : undefined); } async function fileToAudioBuffer(file: File, targetSampleRate: number): Promise { try { const arrayBuffer = await file.arrayBuffer(); const audioCtx = new (window.AudioContext || (window as any).webkitAudioContext)(); const decoded = await audioCtx.decodeAudioData(arrayBuffer); if (decoded.sampleRate === targetSampleRate) { await audioCtx.close(); return decoded; } const offlineCtx = new OfflineAudioContext( decoded.numberOfChannels, Math.ceil(decoded.duration * targetSampleRate), targetSampleRate, ); const src = offlineCtx.createBufferSource(); src.buffer = decoded; src.connect(offlineCtx.destination); src.start(); const rendered = await offlineCtx.startRendering(); await audioCtx.close(); return rendered; } catch (error) { console.error("Failed to decode or resample audio:", error); return null; } } export default function App() { const [screen, setScreen] = useState("intro"); const [history, setHistory] = useState([]); const [viewedTranscription, setViewedTranscription] = useState(null); const [pendingTranscriptionId, setPendingTranscriptionId] = useState(null); const [audioSaveError, setAudioSaveError] = useState(null); const [editingFilename, setEditingFilename] = useState(false); const [selectedLanguage, setSelectedLanguage] = useState("en"); const [search, setSearch] = useState(""); const [audioUrlCache, setAudioUrlCache] = useState>(new Map()); const { status, error, transcription, setTranscription, loadModel, transcribe, stopTranscription } = useVoxtral(); const fileInputRef = useRef(null); const filenameInputRef = useRef(null); const introRef = useRef(null); const urlCacheRef = useRef(audioUrlCache); const sortedHistory = useMemo(() => [...history].sort((a, b) => Number(b.id) - Number(a.id)), [history]); const currentTranscription = useMemo(() => { if (!viewedTranscription) return ""; if (pendingTranscriptionId === viewedTranscription.id) { return transcription; } return viewedTranscription.text; }, [viewedTranscription, pendingTranscriptionId, transcription]); const audioSrc = useMemo(() => { if (!viewedTranscription) return null; const key = inferAudioKey(viewedTranscription); return key ? (audioUrlCache.get(key) ?? null) : null; }, [viewedTranscription, audioUrlCache]); const filteredHistory = useMemo(() => { if (!search.trim()) return sortedHistory; const s = search.trim().toLowerCase(); return sortedHistory.filter( (item) => item.filename.toLowerCase().includes(s) || (item.text && item.text.toLowerCase().includes(s)), ); }, [sortedHistory, search]); const handleFile = useCallback( async (file: File) => { const id = Date.now().toString(); const audioKey = `voxtral_audio_${id}`; setAudioSaveError(null); const objectUrl = URL.createObjectURL(file); setAudioUrlCache((prev) => { if (prev.get(audioKey) === objectUrl) return prev; return new Map(prev).set(audioKey, objectUrl); }); const entry: Transcription = { id, filename: file.name.replace(/\.[^/.]+$/, ""), date: new Date().toLocaleString(), text: null, audioKey, language: selectedLanguage, }; setTranscription(""); setHistory((prev) => [entry, ...prev]); setViewedTranscription(entry); setPendingTranscriptionId(id); saveAudioToDB(audioKey, file).catch((e) => { console.error("DB save error:", e); setAudioSaveError("Failed to save to IndexedDB. Storage may be full."); }); saveHistoryDB([entry]).catch(() => {}); const audioBuffer = await fileToAudioBuffer(file, 16000); const result = audioBuffer ? await transcribe(audioBuffer.getChannelData(0), selectedLanguage) : "Failed to decode audio."; const finalEntry = { ...entry, text: result ?? "Transcription failed." }; setHistory((currentHistory) => { const finalHistory = currentHistory.map((h) => (h.id === id ? finalEntry : h)); saveHistoryDB(finalHistory); return finalHistory; }); setViewedTranscription(finalEntry); setPendingTranscriptionId(null); }, [transcribe, setTranscription, selectedLanguage], ); const deleteHistoryItem = useCallback( async (e: React.MouseEvent, item: Transcription) => { e.stopPropagation(); const isCurrentlyViewed = viewedTranscription?.id === item.id; const key = inferAudioKey(item); const newHistory = history.filter((h) => h.id !== item.id); setHistory(newHistory); if (isCurrentlyViewed) { setViewedTranscription(newHistory.length > 0 ? newHistory[0] : null); } await removeHistoryItemDB(item.id); await removeAudioFromDB(key); if (key) { setAudioUrlCache((prev) => { const newCache = new Map(prev); const urlToRevoke = newCache.get(key); if (urlToRevoke) { URL.revokeObjectURL(urlToRevoke); } newCache.delete(key); return newCache; }); } }, [history, viewedTranscription], ); const updateFilename = useCallback(async (id: string, newFilename: string) => { setHistory((prev) => { const updated = prev.map((h) => (h.id === id ? { ...h, filename: newFilename } : h)); saveHistoryDB(updated); return updated; }); }, []); const updateTranscriptionText = useCallback(async (id: string, newText: string) => { setViewedTranscription((prev) => (prev && prev.id === id ? { ...prev, text: newText } : prev)); setHistory((prev) => { const updated = prev.map((h) => (h.id === id ? { ...h, text: newText } : h)); saveHistoryDB(updated); return updated; }); }, []); useEffect(() => { const input = document.createElement("input"); input.type = "file"; input.accept = "audio/*,video/*"; input.style.display = "none"; const handleChange = (e: Event) => { const target = e.target as HTMLInputElement; const file = target.files?.[0]; if (file) { handleFile(file); } target.value = ""; }; input.addEventListener("change", handleChange); document.body.appendChild(input); fileInputRef.current = input; return () => { input.removeEventListener("change", handleChange); document.body.removeChild(input); }; }, [handleFile]); useEffect(() => { (async () => { const hist = await getHistoryDB(); setHistory(hist); })(); }, []); useEffect(() => { if (!viewedTranscription) return; const key = inferAudioKey(viewedTranscription); if (!key || audioUrlCache.has(key)) { return; } let cancelled = false; (async () => { const url = await getAudioUrlFromDB(key); if (url && !cancelled) { setAudioUrlCache((prev) => new Map(prev).set(key, url)); } })(); return () => { cancelled = true; }; }, [viewedTranscription, audioUrlCache]); useEffect(() => { return () => { for (const url of urlCacheRef.current.values()) { URL.revokeObjectURL(url); } }; }, []); useEffect(() => { if (screen === "main" && sortedHistory.length > 0 && !viewedTranscription) { setViewedTranscription(sortedHistory[0]); } }, [screen, sortedHistory, viewedTranscription]); useEffect(() => { if (screen !== "intro" || !introRef.current) return; const ref = introRef.current; const handleMouseMove = (e: MouseEvent) => { const { clientX, clientY } = e; const { offsetWidth, offsetHeight } = ref; const x = (clientX / offsetWidth) * 100; const y = (clientY / offsetHeight) * 100; ref.style.setProperty("--mouse-x", `${x}%`); ref.style.setProperty("--mouse-y", `${y}%`); }; window.addEventListener("mousemove", handleMouseMove); return () => window.removeEventListener("mousemove", handleMouseMove); }, [screen]); useEffect(() => { if (editingFilename && filenameInputRef.current) { filenameInputRef.current.focus(); filenameInputRef.current.select(); } }, [editingFilename]); if (screen === "intro") { return (
Powered by{" "} Transformers.js

Voxtral WebGPU

State-of-the-art audio transcription directly in your browser.

You are about to load{" "} Voxtral-Mini , a 4.68B parameter model, optimized for inference on the web.

Everything runs entirely in your browser with Transformers.js and{" "} ONNX Runtime Web, meaning no data is sent to a server.

Get started by clicking the button below.

); } if (screen === "loading" || status === "loading") { return (

Loading Voxtral Model...

This may take some time to download on first load.
Afterwards, the model will be cached for future use.

{error && (
{error}
)}
); } const isProcessing = pendingTranscriptionId && pendingTranscriptionId === viewedTranscription?.id; return (
{audioSaveError && (
{audioSaveError}
)} {!viewedTranscription ? (

Select a transcription

Choose an item from the history or add a new file to begin.

) : (
{editingFilename ? ( setViewedTranscription({ ...viewedTranscription, filename: e.target.value })} onBlur={() => { setEditingFilename(false); updateFilename(viewedTranscription.id, viewedTranscription.filename); }} onKeyDown={(e) => { if (e.key === "Enter" || e.key === "Escape") { e.preventDefault(); setEditingFilename(false); updateFilename(viewedTranscription.id, viewedTranscription.filename); } }} /> ) : (

setEditingFilename(true)} > {viewedTranscription.filename}

)}

{viewedTranscription.date}