|
<!DOCTYPE html> |
|
<html lang="en"> |
|
|
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Gemini Voice Chat</title> |
|
<style> |
|
:root { |
|
--color-accent: #6366f1; |
|
--color-background: #0f172a; |
|
--color-surface: #1e293b; |
|
--color-text: #e2e8f0; |
|
--boxSize: 8px; |
|
--gutter: 4px; |
|
} |
|
body { |
|
margin: 0; |
|
padding: 0; |
|
background-color: var(--color-background); |
|
color: var(--color-text); |
|
font-family: system-ui, -apple-system, sans-serif; |
|
min-height: 100vh; |
|
display: flex; |
|
flex-direction: column; |
|
align-items: center; |
|
justify-content: center; |
|
} |
|
.container { |
|
width: 90%; |
|
max-width: 800px; |
|
background-color: var(--color-surface); |
|
padding: 2rem; |
|
border-radius: 1rem; |
|
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25); |
|
} |
|
.wave-container { |
|
position: relative; |
|
display: flex; |
|
min-height: 100px; |
|
max-height: 128px; |
|
justify-content: center; |
|
align-items: center; |
|
margin: 2rem 0; |
|
} |
|
.box-container { |
|
display: flex; |
|
justify-content: space-between; |
|
height: 64px; |
|
width: 100%; |
|
} |
|
.box { |
|
height: 100%; |
|
width: var(--boxSize); |
|
background: var(--color-accent); |
|
border-radius: 8px; |
|
transition: transform 0.05s ease; |
|
} |
|
.controls { |
|
display: grid; |
|
gap: 1rem; |
|
margin-bottom: 2rem; |
|
} |
|
.input-group { |
|
display: flex; |
|
flex-direction: column; |
|
gap: 0.5rem; |
|
} |
|
label { |
|
font-size: 0.875rem; |
|
font-weight: 500; |
|
} |
|
select { |
|
padding: 0.75rem; |
|
border-radius: 0.5rem; |
|
border: 1px solid rgba(255, 255, 255, 0.1); |
|
background-color: var(--color-background); |
|
color: var(--color-text); |
|
font-size: 1rem; |
|
} |
|
button { |
|
padding: 1rem 2rem; |
|
border-radius: 0.5rem; |
|
border: none; |
|
background-color: var(--color-accent); |
|
color: white; |
|
font-weight: 600; |
|
cursor: pointer; |
|
transition: all 0.2s ease; |
|
display: flex; |
|
align-items: center; |
|
justify-content: center; |
|
gap: 12px; |
|
min-width: 180px; |
|
} |
|
button:hover { |
|
opacity: 0.9; |
|
transform: translateY(-1px); |
|
} |
|
.icon-with-spinner { |
|
display: flex; |
|
align-items: center; |
|
justify-content: center; |
|
gap: 12px; |
|
min-width: 180px; |
|
} |
|
.spinner { |
|
width: 20px; |
|
height: 20px; |
|
border: 2px solid white; |
|
border-top-color: transparent; |
|
border-radius: 50%; |
|
animation: spin 1s linear infinite; |
|
flex-shrink: 0; |
|
} |
|
@keyframes spin { |
|
to { |
|
transform: rotate(360deg); |
|
} |
|
} |
|
.pulse-container { |
|
display: flex; |
|
align-items: center; |
|
justify-content: center; |
|
gap: 12px; |
|
} |
|
.pulse-circle { |
|
width: 20px; |
|
height: 20px; |
|
border-radius: 50%; |
|
background-color: white; |
|
opacity: 0.2; |
|
flex-shrink: 0; |
|
transform: translateX(-0%) scale(var(--audio-level, 1)); |
|
transition: transform 0.1s ease; |
|
} |
|
.toast { |
|
position: fixed; |
|
top: 20px; |
|
left: 50%; |
|
transform: translateX(-50%); |
|
padding: 16px 24px; |
|
border-radius: 4px; |
|
font-size: 14px; |
|
z-index: 1000; |
|
display: none; |
|
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); |
|
} |
|
.toast.error { |
|
background-color: #f44336; |
|
color: white; |
|
} |
|
.toast.warning { |
|
background-color: #ffd700; |
|
color: black; |
|
} |
|
.mute-toggle { |
|
width: 24px; |
|
height: 24px; |
|
cursor: pointer; |
|
flex-shrink: 0; |
|
} |
|
.mute-toggle svg { |
|
display: block; |
|
} |
|
#start-button { |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
.api-status { |
|
background-color: var(--color-background); |
|
padding: 1rem; |
|
border-radius: 0.5rem; |
|
margin-bottom: 1rem; |
|
text-align: center; |
|
border: 1px solid rgba(34, 197, 94, 0.3); |
|
} |
|
.api-status.success { |
|
color: #22c55e; |
|
} |
|
.api-status.error { |
|
color: #ef4444; |
|
border-color: rgba(239, 68, 68, 0.3); |
|
} |
|
</style> |
|
</head> |
|
|
|
<body> |
|
<div id="error-toast" class="toast"></div> |
|
<div style="text-align: center"> |
|
<h1>Gemini Voice Chat</h1> |
|
<p>Speak with Gemini using real-time audio streaming</p> |
|
<p> |
|
Using API key from environment variable |
|
</p> |
|
</div> |
|
<div class="container"> |
|
<div class="api-status success"> |
|
✓ API Key configured via environment variable |
|
</div> |
|
|
|
<div class="controls"> |
|
<div class="input-group"> |
|
<label for="voice">Voice</label> |
|
<select id="voice"> |
|
<option value="Puck">Puck</option> |
|
<option value="Charon">Charon</option> |
|
<option value="Kore">Kore</option> |
|
<option value="Fenrir">Fenrir</option> |
|
<option value="Aoede">Aoede</option> |
|
</select> |
|
</div> |
|
</div> |
|
|
|
<div class="wave-container"> |
|
<div class="box-container"> |
|
|
|
</div> |
|
</div> |
|
|
|
<button id="start-button">Start Recording</button> |
|
</div> |
|
|
|
<audio id="audio-output"></audio> |
|
|
|
<script> |
|
let peerConnection; |
|
let audioContext; |
|
let dataChannel; |
|
let isRecording = false; |
|
let webrtc_id; |
|
let isMuted = false; |
|
let analyser_input, dataArray_input; |
|
let analyser, dataArray; |
|
let source_input = null; |
|
let source_output = null; |
|
const startButton = document.getElementById('start-button'); |
|
const voiceSelect = document.getElementById('voice'); |
|
const audioOutput = document.getElementById('audio-output'); |
|
const boxContainer = document.querySelector('.box-container'); |
|
const numBars = 32; |
|
for (let i = 0; i < numBars; i++) { |
|
const box = document.createElement('div'); |
|
box.className = 'box'; |
|
boxContainer.appendChild(box); |
|
} |
|
|
|
|
|
const micIconSVG = ` |
|
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"> |
|
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path> |
|
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path> |
|
<line x1="12" y1="19" x2="12" y2="23"></line> |
|
<line x1="8" y1="23" x2="16" y2="23"></line> |
|
</svg>`; |
|
const micMutedIconSVG = ` |
|
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"> |
|
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path> |
|
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path> |
|
<line x1="12" y1="19" x2="12" y2="23"></line> |
|
<line x1="8" y1="23" x2="16" y2="23"></line> |
|
<line x1="1" y1="1" x2="23" y2="23"></line> |
|
</svg>`; |
|
|
|
function updateButtonState() { |
|
startButton.innerHTML = ''; |
|
startButton.onclick = null; |
|
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) { |
|
startButton.innerHTML = ` |
|
<div class="icon-with-spinner"> |
|
<div class="spinner"></div> |
|
<span>Connecting...</span> |
|
</div> |
|
`; |
|
startButton.disabled = true; |
|
} else if (peerConnection && peerConnection.connectionState === 'connected') { |
|
const pulseContainer = document.createElement('div'); |
|
pulseContainer.className = 'pulse-container'; |
|
pulseContainer.innerHTML = ` |
|
<div class="pulse-circle"></div> |
|
<span>Stop Recording</span> |
|
`; |
|
const muteToggle = document.createElement('div'); |
|
muteToggle.className = 'mute-toggle'; |
|
muteToggle.title = isMuted ? 'Unmute' : 'Mute'; |
|
muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG; |
|
muteToggle.addEventListener('click', toggleMute); |
|
startButton.appendChild(pulseContainer); |
|
startButton.appendChild(muteToggle); |
|
startButton.disabled = false; |
|
} else { |
|
startButton.innerHTML = 'Start Recording'; |
|
startButton.disabled = false; |
|
} |
|
} |
|
|
|
function showError(message) { |
|
const toast = document.getElementById('error-toast'); |
|
toast.textContent = message; |
|
toast.className = 'toast error'; |
|
toast.style.display = 'block'; |
|
setTimeout(() => { |
|
toast.style.display = 'none'; |
|
}, 5000); |
|
} |
|
|
|
function toggleMute(event) { |
|
event.stopPropagation(); |
|
if (!peerConnection || peerConnection.connectionState !== 'connected') return; |
|
isMuted = !isMuted; |
|
console.log("Mute toggled:", isMuted); |
|
peerConnection.getSenders().forEach(sender => { |
|
if (sender.track && sender.track.kind === 'audio') { |
|
sender.track.enabled = !isMuted; |
|
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`); |
|
} |
|
}); |
|
updateButtonState(); |
|
} |
|
|
|
async function setupWebRTC() { |
|
const config = __RTC_CONFIGURATION__; |
|
peerConnection = new RTCPeerConnection(config); |
|
webrtc_id = Math.random().toString(36).substring(7); |
|
const timeoutId = setTimeout(() => { |
|
const toast = document.getElementById('error-toast'); |
|
toast.textContent = "Connection is taking longer than usual. Are you on a VPN?"; |
|
toast.className = 'toast warning'; |
|
toast.style.display = 'block'; |
|
setTimeout(() => { |
|
toast.style.display = 'none'; |
|
}, 5000); |
|
}, 5000); |
|
try { |
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
|
stream.getTracks().forEach(track => peerConnection.addTrack(track, stream)); |
|
if (!audioContext || audioContext.state === 'closed') { |
|
audioContext = new AudioContext(); |
|
} |
|
if (source_input) { |
|
try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting previous input source:", e); } |
|
source_input = null; |
|
} |
|
source_input = audioContext.createMediaStreamSource(stream); |
|
analyser_input = audioContext.createAnalyser(); |
|
source_input.connect(analyser_input); |
|
analyser_input.fftSize = 64; |
|
dataArray_input = new Uint8Array(analyser_input.frequencyBinCount); |
|
updateAudioLevel(); |
|
peerConnection.addEventListener('connectionstatechange', () => { |
|
console.log('connectionstatechange', peerConnection.connectionState); |
|
if (peerConnection.connectionState === 'connected') { |
|
clearTimeout(timeoutId); |
|
const toast = document.getElementById('error-toast'); |
|
toast.style.display = 'none'; |
|
if (analyser_input) updateAudioLevel(); |
|
if (analyser) updateVisualization(); |
|
} |
|
updateButtonState(); |
|
}); |
|
peerConnection.onicecandidate = ({ candidate }) => { |
|
if (candidate) { |
|
console.debug("Sending ICE candidate", candidate); |
|
fetch('/webrtc/offer', { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ |
|
candidate: candidate.toJSON(), |
|
webrtc_id: webrtc_id, |
|
type: "ice-candidate", |
|
}) |
|
}) |
|
} |
|
}; |
|
peerConnection.addEventListener('track', (evt) => { |
|
if (evt.track.kind === 'audio' && audioOutput) { |
|
if (audioOutput.srcObject !== evt.streams[0]) { |
|
audioOutput.srcObject = evt.streams[0]; |
|
audioOutput.play().catch(e => console.error("Audio play failed:", e)); |
|
if (!audioContext || audioContext.state === 'closed') { |
|
console.warn("AudioContext not ready for output track analysis."); |
|
return; |
|
} |
|
if (source_output) { |
|
try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting previous output source:", e); } |
|
source_output = null; |
|
} |
|
source_output = audioContext.createMediaStreamSource(evt.streams[0]); |
|
analyser = audioContext.createAnalyser(); |
|
source_output.connect(analyser); |
|
analyser.fftSize = 2048; |
|
dataArray = new Uint8Array(analyser.frequencyBinCount); |
|
updateVisualization(); |
|
} |
|
} |
|
}); |
|
dataChannel = peerConnection.createDataChannel('text'); |
|
dataChannel.onmessage = (event) => { |
|
const eventJson = JSON.parse(event.data); |
|
if (eventJson.type === "error") { |
|
showError(eventJson.message); |
|
} else if (eventJson.type === "send_input") { |
|
fetch('/input_hook', { |
|
method: 'POST', |
|
headers: { |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
webrtc_id: webrtc_id, |
|
voice_name: voiceSelect.value |
|
|
|
}) |
|
}); |
|
} |
|
}; |
|
const offer = await peerConnection.createOffer(); |
|
await peerConnection.setLocalDescription(offer); |
|
const response = await fetch('/webrtc/offer', { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ |
|
sdp: peerConnection.localDescription.sdp, |
|
type: peerConnection.localDescription.type, |
|
webrtc_id: webrtc_id, |
|
}) |
|
}); |
|
const serverResponse = await response.json(); |
|
if (serverResponse.status === 'failed') { |
|
showError(serverResponse.meta.error === 'concurrency_limit_reached' |
|
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}` |
|
: serverResponse.meta.error); |
|
stopWebRTC(); |
|
startButton.textContent = 'Start Recording'; |
|
return; |
|
} |
|
await peerConnection.setRemoteDescription(serverResponse); |
|
} catch (err) { |
|
clearTimeout(timeoutId); |
|
console.error('Error setting up WebRTC:', err); |
|
showError('Failed to establish connection. Please try again.'); |
|
stopWebRTC(); |
|
startButton.textContent = 'Start Recording'; |
|
} |
|
} |
|
|
|
function updateVisualization() { |
|
if (!analyser || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) { |
|
const bars = document.querySelectorAll('.box'); |
|
bars.forEach(bar => bar.style.transform = 'scaleY(0.1)'); |
|
return; |
|
} |
|
analyser.getByteFrequencyData(dataArray); |
|
const bars = document.querySelectorAll('.box'); |
|
for (let i = 0; i < bars.length; i++) { |
|
const barHeight = (dataArray[i] / 255) * 2; |
|
bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`; |
|
} |
|
requestAnimationFrame(updateVisualization); |
|
} |
|
|
|
function updateAudioLevel() { |
|
if (!analyser_input || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) { |
|
const pulseCircle = document.querySelector('.pulse-circle'); |
|
if (pulseCircle) { |
|
pulseCircle.style.setProperty('--audio-level', 1); |
|
} |
|
return; |
|
} |
|
analyser_input.getByteFrequencyData(dataArray_input); |
|
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length; |
|
const audioLevel = average / 255; |
|
const pulseCircle = document.querySelector('.pulse-circle'); |
|
if (pulseCircle) { |
|
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel); |
|
} |
|
requestAnimationFrame(updateAudioLevel); |
|
} |
|
|
|
function stopWebRTC() { |
|
console.log("Running stopWebRTC"); |
|
if (peerConnection) { |
|
peerConnection.getSenders().forEach(sender => { |
|
if (sender.track) { |
|
sender.track.stop(); |
|
} |
|
}); |
|
peerConnection.ontrack = null; |
|
peerConnection.onicegatheringstatechange = null; |
|
peerConnection.onconnectionstatechange = null; |
|
if (dataChannel) { |
|
dataChannel.onmessage = null; |
|
try { dataChannel.close(); } catch (e) { console.warn("Error closing data channel:", e); } |
|
dataChannel = null; |
|
} |
|
try { peerConnection.close(); } catch (e) { console.warn("Error closing peer connection:", e); } |
|
peerConnection = null; |
|
} |
|
if (audioOutput) { |
|
audioOutput.pause(); |
|
audioOutput.srcObject = null; |
|
} |
|
if (source_input) { |
|
try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting input source:", e); } |
|
source_input = null; |
|
} |
|
if (source_output) { |
|
try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting output source:", e); } |
|
source_output = null; |
|
} |
|
if (audioContext && audioContext.state !== 'closed') { |
|
audioContext.close().then(() => { |
|
console.log("AudioContext closed successfully."); |
|
audioContext = null; |
|
}).catch(e => { |
|
console.error("Error closing AudioContext:", e); |
|
audioContext = null; |
|
}); |
|
} else { |
|
audioContext = null; |
|
} |
|
analyser_input = null; |
|
dataArray_input = null; |
|
analyser = null; |
|
dataArray = null; |
|
isMuted = false; |
|
isRecording = false; |
|
updateButtonState(); |
|
const bars = document.querySelectorAll('.box'); |
|
bars.forEach(bar => bar.style.transform = 'scaleY(0.1)'); |
|
const pulseCircle = document.querySelector('.pulse-circle'); |
|
if (pulseCircle) { |
|
pulseCircle.style.setProperty('--audio-level', 1); |
|
} |
|
} |
|
|
|
startButton.addEventListener('click', (event) => { |
|
if (event.target.closest('.mute-toggle')) { |
|
return; |
|
} |
|
if (peerConnection && peerConnection.connectionState === 'connected') { |
|
console.log("Stop button clicked"); |
|
stopWebRTC(); |
|
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) { |
|
console.log("Start button clicked"); |
|
setupWebRTC(); |
|
isRecording = true; |
|
updateButtonState(); |
|
} |
|
}); |
|
|
|
updateButtonState(); |
|
</script> |
|
</body> |
|
|
|
</html> |