sing-evaluation / index.html
soiz1's picture
Update index.html
ea5fc76 verified
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>歌唱類似度評価システム</title>
<script src="https://cdn.jsdelivr.net/npm/essentia.js/dist/essentia-wasm.web.js"></script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
line-height: 1.6;
}
.container {
display: flex;
flex-direction: column;
gap: 20px;
}
.control-panel {
display: flex;
flex-direction: column;
gap: 15px;
padding: 20px;
border: 1px solid #ddd;
border-radius: 8px;
background-color: #f9f9f9;
}
button {
padding: 10px 15px;
background-color: #4CAF50;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
}
button:disabled {
background-color: #cccccc;
cursor: not-allowed;
}
.mode-selector {
display: flex;
gap: 15px;
margin-bottom: 10px;
}
.mode-option {
display: flex;
align-items: center;
gap: 5px;
}
.result {
margin-top: 20px;
padding: 15px;
border: 1px solid #ddd;
border-radius: 8px;
background-color: #f0f8ff;
display: none;
}
.progress-bar {
width: 100%;
background-color: #e0e0e0;
border-radius: 4px;
margin: 10px 0;
}
.progress {
height: 20px;
border-radius: 4px;
background-color: #4CAF50;
width: 0%;
transition: width 0.3s;
}
.audio-container {
display: flex;
justify-content: space-between;
gap: 20px;
margin-top: 20px;
}
.audio-box {
flex: 1;
padding: 10px;
border: 1px solid #ddd;
border-radius: 8px;
}
h2 {
margin-top: 0;
color: #333;
}
</style>
</head>
<body>
<div class="container">
<h1>歌唱類似度評価システム</h1>
<div class="control-panel">
<h2>手本音声選択</h2>
<select id="sampleSelect">
<option value="sample1.mp3">サンプル曲1</option>
<option value="sample2.mp3">サンプル曲2</option>
<option value="sample3.mp3">サンプル曲3</option>
</select>
<h2>録音モード選択</h2>
<div class="mode-selector">
<div class="mode-option">
<input type="radio" id="micMode" name="recordingMode" value="mic" checked>
<label for="micMode">マイクで録音</label>
</div>
<div class="mode-option">
<input type="radio" id="uploadMode" name="recordingMode" value="upload">
<label for="uploadMode">ファイルをアップロード</label>
</div>
</div>
<div id="uploadContainer" style="display: none;">
<input type="file" id="audioUpload" accept="audio/*">
</div>
<button id="startButton">スタート</button>
<button id="stopButton" disabled>ストップ</button>
<div class="progress-bar">
<div class="progress" id="progressBar"></div>
</div>
</div>
<div class="result" id="resultContainer">
<h2>結果</h2>
<p>類似度スコア: <span id="score">0</span>%</p>
<div class="audio-container">
<div class="audio-box">
<h3>手本音声</h3>
<audio controls id="referenceAudio"></audio>
</div>
<div class="audio-box">
<h3>あなたの歌唱</h3>
<audio controls id="userAudio"></audio>
</div>
</div>
</div>
</div>
<script>
// グローバル変数
let audioContext;
let mediaRecorder;
let recordedChunks = [];
let referenceAudio = document.getElementById('referenceAudio');
let startButton = document.getElementById('startButton');
let stopButton = document.getElementById('stopButton');
let progressBar = document.getElementById('progressBar');
let resultContainer = document.getElementById('resultContainer');
let scoreElement = document.getElementById('score');
let userAudio = document.getElementById('userAudio');
let sampleSelect = document.getElementById('sampleSelect');
let micModeRadio = document.getElementById('micMode');
let uploadModeRadio = document.getElementById('uploadMode');
let uploadContainer = document.getElementById('uploadContainer');
let audioUpload = document.getElementById('audioUpload');
// モード選択の切り替え
micModeRadio.addEventListener('change', () => {
uploadContainer.style.display = 'none';
});
uploadModeRadio.addEventListener('change', () => {
uploadContainer.style.display = 'block';
});
// スタートボタンの処理
startButton.addEventListener('click', async () => {
try {
startButton.disabled = true;
stopButton.disabled = false;
resultContainer.style.display = 'none';
progressBar.style.width = '0%';
// 手本音声の設定
const sampleFile = sampleSelect.value;
referenceAudio.src = sampleFile;
if (micModeRadio.checked) {
// マイク録音モード
await startMicrophoneRecording();
} else {
// ファイルアップロードモード
if (!audioUpload.files[0]) {
alert('音声ファイルを選択してください');
startButton.disabled = false;
return;
}
}
// 手本音声再生開始
referenceAudio.play();
// 再生進捗をプログレスバーで表示
referenceAudio.addEventListener('timeupdate', updateProgressBar);
// 手本音声終了時の処理
referenceAudio.addEventListener('ended', () => {
stopRecording();
});
} catch (error) {
console.error('Error:', error);
alert('エラーが発生しました: ' + error.message);
startButton.disabled = false;
stopButton.disabled = true;
}
});
// ストップボタンの処理
stopButton.addEventListener('click', () => {
stopRecording();
});
// マイク録音開始
async function startMicrophoneRecording() {
recordedChunks = [];
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
audioContext = new (window.AudioContext || window.webkitAudioContext)();
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
recordedChunks.push(event.data);
}
};
mediaRecorder.start();
}
// 録音停止
function stopRecording() {
// プログレスバーの更新を停止
referenceAudio.removeEventListener('timeupdate', updateProgressBar);
if (micModeRadio.checked && mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop();
mediaRecorder.onstop = processRecordedAudio;
} else if (uploadModeRadio.checked) {
processUploadedAudio();
} else {
startButton.disabled = false;
stopButton.disabled = true;
}
}
// 録音データの処理
function processRecordedAudio() {
const audioBlob = new Blob(recordedChunks, { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(audioBlob);
userAudio.src = audioUrl;
// 音声解析を実行
analyzeAudio(referenceAudio.src, audioBlob);
}
// アップロードファイルの処理
function processUploadedAudio() {
const file = audioUpload.files[0];
const audioUrl = URL.createObjectURL(file);
userAudio.src = audioUrl;
// 音声解析を実行
analyzeAudio(referenceAudio.src, file);
}
// プログレスバー更新
function updateProgressBar() {
const progress = (referenceAudio.currentTime / referenceAudio.duration) * 100;
progressBar.style.width = `${progress}%`;
}
async function analyzeAudio(referenceSrc, userAudioData) {
try {
// Essentia.jsの初期化
const Essentia = await EssentiaWASM.Essentia();
const essentia = new Essentia(Essentia.FFTW); // FFTバックエンドを使用
// 参照音声とユーザー音声をデコード
const [referenceArrayBuffer, userArrayBuffer] = await Promise.all([
fetch(referenceSrc).then(res => res.arrayBuffer()),
readAudioData(userAudioData)
]);
const referenceAudioBuffer = await decodeAudioData(audioContext, referenceArrayBuffer);
const userAudioBuffer = await decodeAudioData(audioContext, userArrayBuffer);
// モノラル化とサンプルレートの統一
const referenceMono = convertToMono(referenceAudioBuffer);
const userMono = convertToMono(userAudioBuffer);
// 特徴量抽出
const referenceFeatures = extractFeatures(essentia, referenceMono);
const userFeatures = extractFeatures(essentia, userMono);
// 特徴量の正規化
normalizeFeatures(referenceFeatures);
normalizeFeatures(userFeatures);
// 類似度計算
const similarityScore = calculateSimilarity(
referenceFeatures.mfcc.flat(),
userFeatures.mfcc.flat()
);
// スコア表示 (0-100に変換)
const displayScore = Math.min(Math.max(Math.round(similarityScore * 100), 0), 100);
scoreElement.textContent = displayScore;
// 結果表示
resultContainer.style.display = 'block';
startButton.disabled = false;
stopButton.disabled = true;
// Essentiaインスタンスのクリーンアップ
essentia.delete();
} catch (error) {
console.error('分析エラー:', error);
alert('音声分析中にエラーが発生しました: ' + error.message);
startButton.disabled = false;
stopButton.disabled = true;
}
}
// 補助関数群
async function readAudioData(audioData) {
if (audioData instanceof Blob) {
return new Response(audioData).arrayBuffer();
}
return audioData;
}
async function decodeAudioData(context, arrayBuffer) {
return new Promise((resolve, reject) => {
context.decodeAudioData(arrayBuffer, resolve, reject);
});
}
function convertToMono(audioBuffer) {
if (audioBuffer.numberOfChannels === 1) return audioBuffer.getChannelData(0);
const left = audioBuffer.getChannelData(0);
const right = audioBuffer.numberOfChannels > 1 ?
audioBuffer.getChannelData(1) : left;
const mono = new Float32Array(left.length);
for (let i = 0; i < left.length; i++) {
mono[i] = (left[i] + right[i]) / 2;
}
return mono;
}
function extractFeatures(essentia, audioData) {
// パラメータ設定
const frameSize = 2048;
const hopSize = 1024;
const sampleRate = 44100;
// フレームごとに特徴量を抽出
const frames = essentia.FrameGenerator(audioData, frameSize, hopSize);
const features = {
mfcc: [],
spectral: {
centroid: [],
rolloff: [],
flux: []
}
};
for (let i = 0; i < frames.size(); i++) {
const frame = frames.get(i);
// ウィンドウ処理 → スペクトル変換(共通処理)
const windowed = essentia.Windowing(frame, 'hann', frameSize, false);
const spectrum = essentia.Spectrum(windowed);
// MFCC
const mfccResult = essentia.MFCC(spectrum);
features.mfcc.push(mfccResult.mfcc); // .mfcc 部分だけ取り出して保存
// スペクトル特徴量
features.spectral.centroid.push(
essentia.SpectralCentroid(spectrum).centroid
);
features.spectral.rolloff.push(
essentia.SpectralRollOff(spectrum, 0.85).rollOff
);
features.spectral.flux.push(
essentia.Flux(spectrum).flux
);
frame.delete(); // メモリ解放(必要であれば)
}
frames.delete();
return features;
}
function normalizeFeatures(features) {
// MFCCの正規化 (0-1範囲に)
const mfccFlat = features.mfcc.flat();
const min = Math.min(...mfccFlat);
const max = Math.max(...mfccFlat);
for (let i = 0; i < features.mfcc.length; i++) {
for (let j = 0; j < features.mfcc[i].length; j++) {
features.mfcc[i][j] = (features.mfcc[i][j] - min) / (max - min);
}
}
// スペクトル特徴量の正規化
['centroid', 'rolloff', 'flux'].forEach(key => {
const arr = features.spectral[key];
const min = Math.min(...arr);
const max = Math.max(...arr);
for (let i = 0; i < arr.length; i++) {
arr[i] = (arr[i] - min) / (max - min);
}
});
}
function calculateSimilarity(vec1, vec2) {
// コサイン類似度計算
if (vec1.length !== vec2.length || vec1.length === 0) return 0;
let dotProduct = 0;
let norm1 = 0;
let norm2 = 0;
for (let i = 0; i < vec1.length; i++) {
dotProduct += vec1[i] * vec2[i];
norm1 += vec1[i] * vec1[i];
norm2 += vec2[i] * vec2[i];
}
norm1 = Math.sqrt(norm1);
norm2 = Math.sqrt(norm2);
if (norm1 === 0 || norm2 === 0) return 0;
return dotProduct / (norm1 * norm2);
}
</script>
</body>
</html>