import gradio as gr import torch import librosa from transformers import Wav2Vec2Processor, AutoModelForCTC import zipfile import os import firebase_admin from firebase_admin import credentials, firestore from datetime import datetime import json import tempfile # Initialize Firebase firebase_config = json.loads(os.environ.get('firebase_creds')) cred = credentials.Certificate(firebase_config) firebase_admin.initialize_app(cred) db = firestore.client() # Load the ASR model and processor MODEL_NAME = "eleferrand/xlsr53_Amis" processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) model = AutoModelForCTC.from_pretrained(MODEL_NAME) # Language configuration LANGUAGE = { "en": { "title": "ASR Demo with Editable Transcription", "step1": "Step 1: Audio Upload & Transcription", "audio_input": "Audio Input", "transcribe_btn": "Transcribe Audio", "step2": "Step 2: Review & Edit Transcription", "original_text": "Original Transcription", "corrected_text": "Corrected Transcription", "transcription_placeholder": "Transcription will appear here...", "step3": "Step 3: User Information", "age_label": "Age", "native_speaker": "Native Amis Speaker", "step4": "Step 4: Save & Download", "save_btn": "Save Correction to Database", "save_status": "Save Status", "download_btn": "Download Results (ZIP)", "status_placeholder": "Status messages will appear here...", "toggle_lang": "中文/English" }, "zh": { "title": "可編輯轉寫的語音辨識演示", "step1": "步驟一: 音頻上傳與轉寫", "audio_input": "音頻輸入", "transcribe_btn": "開始轉寫", "step2": "步驟二: 校對與編輯轉寫結果", "original_text": "原始轉寫結果", "corrected_text": "校正後文本", "transcription_placeholder": "轉寫結果將顯示在此處...", "step3": "步驟三: 用戶資訊", "age_label": "年齡", "native_speaker": "阿美族母語者", "step4": "步驟四: 保存與下載", "save_btn": "保存校正結果至數據庫", "save_status": "保存狀態", "download_btn": "下載結果(ZIP壓縮檔)", "status_placeholder": "狀態訊息將顯示在此處...", "toggle_lang": "English/中文" } } current_lang = gr.State(value="en") def transcribe(audio_file): try: audio, rate = librosa.load(audio_file, sr=16000) input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values with torch.no_grad(): logits = model(input_values).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.batch_decode(predicted_ids)[0] return transcription.replace("[UNK]", "") except Exception as e: return f"Error processing file: {e}" def transcribe_both(audio_file): start_time = datetime.now() transcription = transcribe(audio_file) processing_time = (datetime.now() - start_time).total_seconds() return transcription, transcription, processing_time def store_correction(original_transcription, corrected_transcription, audio_file, processing_time, age, native_speaker): try: audio_metadata = {} if audio_file and os.path.exists(audio_file): audio, sr = librosa.load(audio_file, sr=16000) duration = librosa.get_duration(y=audio, sr=sr) file_size = os.path.getsize(audio_file) audio_metadata = {'duration': duration, 'file_size': file_size} combined_data = { 'original_text': original_transcription, 'corrected_text': corrected_transcription, 'timestamp': datetime.now().isoformat(), 'processing_time': processing_time, 'audio_metadata': audio_metadata, 'audio_url': None, 'model_name': MODEL_NAME, 'user_info': { 'native_amis_speaker': native_speaker, 'age': age } } db.collection('transcriptions').add(combined_data) return "Correction saved successfully!" except Exception as e: return f"Error saving correction: {e}" def prepare_download(audio_file, original_transcription, corrected_transcription): if audio_file is None: return None tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip") tmp_zip.close() with zipfile.ZipFile(tmp_zip.name, "w") as zf: if os.path.exists(audio_file): zf.write(audio_file, arcname="audio.wav") orig_txt = "original_transcription.txt" with open(orig_txt, "w", encoding="utf-8") as f: f.write(original_transcription) zf.write(orig_txt, arcname="original_transcription.txt") os.remove(orig_txt) corr_txt = "corrected_transcription.txt" with open(corr_txt, "w", encoding="utf-8") as f: f.write(corrected_transcription) zf.write(corr_txt, arcname="corrected_transcription.txt") os.remove(corr_txt) return tmp_zip.name def toggle_language(lang): new_lang = "zh" if lang == "en" else "en" lang_dict = LANGUAGE[new_lang] return [ gr.Markdown.update(value=f"