File size: 5,765 Bytes
0e467b4 4b8259d 0e467b4 4b8259d 0f2e342 0e467b4 4b8259d 0f2e342 4b8259d 95eb12c 4b8259d 95eb12c 0e467b4 0f2e342 4b8259d 0e467b4 556e3aa 4b8259d ec5ffa1 4b8259d 0e467b4 4b8259d 0e467b4 4b8259d 0e467b4 4b8259d 0e467b4 d954236 4b8259d d954236 3bc7caf 4b8259d 556e3aa 4b8259d d954236 4b8259d d954236 4b8259d 3bc7caf 4b8259d ec5ffa1 4b8259d 3bc7caf 4b8259d 3bc7caf 4b8259d d954236 4b8259d 556e3aa ec5ffa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import gradio as gr
import torch
import librosa
from transformers import Wav2Vec2Processor, AutoModelForCTC
import zipfile
import os
import firebase_admin
from firebase_admin import credentials, firestore, storage
from datetime import datetime, timedelta
import json
tmpdir = None
def transcribe(audio_file):
try:
audio, rate = librosa.load(audio_file, sr=16000)
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
with torch.no_grad():
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
return transcription.replace("[UNK]", "")
except Exception as e:
return f"處理文件錯誤: {e}"
# Initialize Firebase
firebase_config = json.loads(os.environ.get('firebase_creds'))
cred = credentials.Certificate(firebase_config)
firebase_admin.initialize_app(cred, {
"storageBucket": "amis-asr-corrections-dem-8cf3d.firebasestorage.app"
})
db = firestore.client()
bucket = storage.bucket()
# Load ASR model and processor
MODEL_NAME = "eleferrand/XLSR_paiwan"
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
model = AutoModelForCTC.from_pretrained(MODEL_NAME)
def transcribe_both(audio_file):
transcription = transcribe(audio_file)
return transcription, transcription
def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
try:
audio_metadata = {}
audio_file_url = None
if audio_file and os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=44100)
duration = librosa.get_duration(y=audio, sr=sr)
file_size = os.path.getsize(audio_file)
audio_metadata = {'duration': duration, 'file_size': file_size}
unique_id = str(uuid.uuid4())
destination_path = f"audio/pai/{unique_id}.wav"
blob = bucket.blob(destination_path)
blob.upload_from_filename(audio_file)
audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
combined_data = {
'transcription_info': {'original_text': original_transcription, 'corrected_text': corrected_transcription, 'language': 'pai'},
'audio_data': {'audio_metadata': audio_metadata, 'audio_file_url': audio_file_url},
'user_info': {'native_paiwan_speaker': native_speaker, 'age': age},
'timestamp': datetime.now().isoformat(), 'model_name': MODEL_NAME
}
db.collection('paiwan_transcriptions').add(combined_data)
return "校正保存成功!"
except Exception as e:
return f"保存失败: {e}"
def prepare_download(audio_file, original_transcription, corrected_transcription):
if audio_file is None:
return None
tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
tmp_zip.close()
with zipfile.ZipFile(tmp_zip.name, "w") as zf:
if os.path.exists(audio_file):
zf.write(audio_file, arcname="audio.wav")
orig_txt = "original_transcription.txt"
with open(orig_txt, "w", encoding="utf-8") as f:
f.write(original_transcription)
zf.write(orig_txt, arcname=orig_txt)
os.remove(orig_txt)
corr_txt = "corrected_transcription.txt"
with open(corr_txt, "w", encoding="utf-8") as f:
f.write(corrected_transcription)
zf.write(corr_txt, arcname=corr_txt)
os.remove(corr_txt)
return tmp_zip.name
# Interface
with gr.Blocks() as demo:
title = gr.Markdown("排灣語自動語音識別校正系統 (Paiwan ASR Transcription & Correction System)")
step1 = gr.Markdown(
"步驟 1:音訊上傳與產生逐字稿 (Audio Upload & Automatic Transcription)\n\n上傳後系統將自動產生逐字稿,請耐心等待。"
)
with gr.Row():
audio_input = gr.Audio(
sources=["upload", "microphone"], type="filepath", label="音訊輸入 (Audio Input)"
)
step2 = gr.Markdown("步驟 2:審閱與編輯逐字稿 (Step 2: Review & Edit Transcription)")
with gr.Row():
original_text = gr.Textbox(
label="原始逐字稿 (Original Transcription)", interactive=False, lines=5
)
corrected_text = gr.Textbox(
label="更正逐字稿 (Corrected Transcription)", interactive=True, lines=5
)
# Automatically generate transcription on audio upload
audio_input.change(
transcribe_both,
inputs=audio_input,
outputs=[original_text, corrected_text],
queue=True
)
step3 = gr.Markdown("步驟 3:使用者資訊 (Step 3: User Information)")
with gr.Row():
age_input = gr.Slider(
minimum=0, maximum=100, step=1, label="年齡 (Age)", value=25
)
native_speaker_input = gr.Checkbox(
label="母語排灣語使用者? (Native Paiwan Speaker?)", value=True
)
step4 = gr.Markdown("步驟 4:儲存與下載 (Step 4: Save & Download)")
with gr.Row():
save_button = gr.Button("儲存 (Save)")
save_status = gr.Textbox(
label="儲存狀態 (Save Status)", interactive=False
)
with gr.Row():
download_button = gr.Button("下載 ZIP 檔案 (Download ZIP File)")
download_output = gr.File()
save_button.click(
store_correction,
inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
outputs=save_status
)
download_button.click(
prepare_download,
inputs=[audio_input, original_text, corrected_text],
outputs=download_output
)
demo.launch()
|