Spaces:

yunuseduran
/

speechtotext

Sleeping

File size: 3,686 Bytes

2df36c8

import gradio as gr
import whisper
import os
import asyncio
import shutil
import tempfile
import uuid
import torch

# Whisper modeli yükleme
MODEL_SIZE = os.getenv("MODEL_SIZE", "small")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Kullanılan cihaz: {device}")  # Cihaz bilgisi
model = whisper.load_model(MODEL_SIZE).to(device)

# Dosya sınırları
MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", 25))  # Maksimum 25MB
ALLOWED_FORMATS = {"mp3", "wav", "m4a", "ogg"}

def delete_audio_file(file_path, retries=3, wait_time=1):
    """

    Ses dosyasını silme işlemi (tekrar deneme mekanizmalı)

    """
    for attempt in range(retries):
        try:
            if file_path and os.path.exists(file_path):
                os.remove(file_path)
                print(f"Dosya başarıyla silindi: {file_path}")
                return True
        except Exception as e:
            print(f"Dosya silme başarısız (Deneme {attempt + 1}/{retries}): {file_path} - {e}")
    return False

def format_text(text: str) -> str:
    """Cümleleri satır satır düzenler"""
    noktalama_isaretleri = [".", "?", "!", "…"]
    for nokta in noktalama_isaretleri:
        text = text.replace(nokta, nokta + "\n")
    return "\n".join(filter(bool, text.split("\n")))

async def process_and_transcribe(audio_path):
    """

    Ses dosyasını işleyip metne çevirir.

    """
    if not audio_path or not os.path.exists(audio_path):
        return "", "❌ Ses dosyası yüklenmedi."

    file_extension = os.path.splitext(audio_path)[-1].lower().lstrip(".")
    if file_extension not in ALLOWED_FORMATS:
        return "", f"❌ Desteklenen formatlar: {', '.join(ALLOWED_FORMATS)} (Yüklenen: {file_extension})."

    temp_audio_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.{file_extension}")
    shutil.copy(audio_path, temp_audio_path)

    try:
        file_size_mb = os.path.getsize(temp_audio_path) / (1024 * 1024)
        if file_size_mb > MAX_FILE_SIZE_MB:
            delete_audio_file(temp_audio_path)
            return "", f"❌ Dosya boyutu {MAX_FILE_SIZE_MB}MB'ı geçemez (Yüklenen: {file_size_mb:.2f}MB)."

        text = await transcribe_audio(temp_audio_path)
        formatted = format_text(text)
        return formatted, "✅ Transkript başarıyla oluşturuldu."

    except Exception as e:
        return "", f"⚠️ Hata oluştu: {str(e)}"

    finally:
        delete_audio_file(temp_audio_path)

async def transcribe_audio(audio_path):
    """

    Whisper ile transkript oluşturma

    """
    result = await asyncio.to_thread(
        model.transcribe,
        audio_path,
        language="tr",
        temperature=0.0,
        beam_size=5,
        fp16=False  # CPU kullanımında hata önlemek için
    )
    return result["text"]

with gr.Blocks() as demo:
    gr.Markdown("## 🎤 Türkçe Ses Transkript Aracı")
    gr.Markdown("""

         **Türkçe sesleri yazıya döken araç**. **Maksimum 25MB** boyutunda ses dosyaları yükleyebilirsiniz.

         """)
    
    with gr.Row():
        audio_input = gr.Audio(
            label="Ses dosyasını yükleyin veya kaydedin",
            type="filepath",
            autoplay=False,
        )
    
    transcribe_btn = gr.Button("Transkript oluştur")
    status_text = gr.Textbox(label="Durum", interactive=False)
    
    with gr.Row():
        output_text = gr.Textbox(label="Transkript Sonucu")

    transcribe_btn.click(
        fn=process_and_transcribe,
        inputs=[audio_input],
        outputs=[output_text, status_text],
    )

demo.launch()