|
|
|
|
|
import os |
|
import re |
|
import numpy as np |
|
from pydub import AudioSegment |
|
from fastapi import FastAPI, UploadFile, File |
|
from fastapi.responses import JSONResponse |
|
from huggingface_hub import login |
|
from hazm import Normalizer |
|
import nemo.collections.asr as nemo_asr |
|
import uvicorn |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
if not HF_TOKEN: |
|
raise ValueError("HF_TOKEN environment variable not set. Please provide a valid Hugging Face token.") |
|
|
|
login(HF_TOKEN) |
|
|
|
|
|
asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("faimlab/stt_fa_fastconformer_hybrid_large_dataset_v30") |
|
|
|
normalizer = Normalizer() |
|
app = FastAPI() |
|
|
|
|
|
def load_audio(audio_file_path): |
|
audio = AudioSegment.from_file(audio_file_path) |
|
audio = audio.set_channels(1).set_frame_rate(16000) |
|
audio_samples = np.array(audio.get_array_of_samples(), dtype=np.float32) |
|
audio_samples /= np.max(np.abs(audio_samples)) |
|
return audio_samples, audio.frame_rate |
|
|
|
|
|
def transcribe_chunk(audio_chunk, model): |
|
transcription = model.transcribe([audio_chunk], batch_size=1, verbose=False) |
|
return transcription[0].text |
|
|
|
|
|
def transcribe_audio(file_path, model, chunk_size=30 * 16000): |
|
waveform, _ = load_audio(file_path) |
|
transcriptions = [] |
|
for start in range(0, len(waveform), chunk_size): |
|
end = min(len(waveform), start + chunk_size) |
|
transcription = transcribe_chunk(waveform[start:end], model) |
|
transcriptions.append(transcription) |
|
|
|
final_transcription = ' '.join(transcriptions) |
|
final_transcription = re.sub(' +', ' ', final_transcription) |
|
final_transcription = normalizer.normalize(final_transcription) |
|
|
|
return final_transcription |
|
|
|
|
|
@app.post("/transcribe") |
|
async def transcribe(file: UploadFile = File(...)): |
|
try: |
|
temp_path = f"/tmp/{file.filename}" |
|
with open(temp_path, "wb") as f: |
|
f.write(await file.read()) |
|
|
|
result = transcribe_audio(temp_path, asr_model) |
|
return {"transcription": result} |
|
except Exception as e: |
|
return JSONResponse(status_code=500, content={"error": str(e)}) |
|
|
|
|