Spaces:

yunuseduran
/

speechtotext

Sleeping

App Files Files Community

speechtotext / app.py

yunuseduran

Upload 2 files

2df36c8 verified 6 months ago

raw

history blame

3.69 kB

	import gradio as gr
	import whisper
	import os
	import asyncio
	import shutil
	import tempfile
	import uuid
	import torch

	# Whisper modeli yükleme
	MODEL_SIZE = os.getenv("MODEL_SIZE", "small")
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Kullanılan cihaz: {device}") # Cihaz bilgisi
	model = whisper.load_model(MODEL_SIZE).to(device)

	# Dosya sınırları
	MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", 25)) # Maksimum 25MB
	ALLOWED_FORMATS = {"mp3", "wav", "m4a", "ogg"}

	def delete_audio_file(file_path, retries=3, wait_time=1):
	"""
	Ses dosyasını silme işlemi (tekrar deneme mekanizmalı)
	"""
	for attempt in range(retries):
	try:
	if file_path and os.path.exists(file_path):
	os.remove(file_path)
	print(f"Dosya başarıyla silindi: {file_path}")
	return True
	except Exception as e:
	print(f"Dosya silme başarısız (Deneme {attempt + 1}/{retries}): {file_path} - {e}")
	return False

	def format_text(text: str) -> str:
	"""Cümleleri satır satır düzenler"""
	noktalama_isaretleri = [".", "?", "!", "…"]
	for nokta in noktalama_isaretleri:
	text = text.replace(nokta, nokta + "\n")
	return "\n".join(filter(bool, text.split("\n")))

	async def process_and_transcribe(audio_path):
	"""
	Ses dosyasını işleyip metne çevirir.
	"""
	if not audio_path or not os.path.exists(audio_path):
	return "", "❌ Ses dosyası yüklenmedi."

	file_extension = os.path.splitext(audio_path)[-1].lower().lstrip(".")
	if file_extension not in ALLOWED_FORMATS:
	return "", f"❌ Desteklenen formatlar: {', '.join(ALLOWED_FORMATS)} (Yüklenen: {file_extension})."

	temp_audio_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.{file_extension}")
	shutil.copy(audio_path, temp_audio_path)

	try:
	file_size_mb = os.path.getsize(temp_audio_path) / (1024 * 1024)
	if file_size_mb > MAX_FILE_SIZE_MB:
	delete_audio_file(temp_audio_path)
	return "", f"❌ Dosya boyutu {MAX_FILE_SIZE_MB}MB'ı geçemez (Yüklenen: {file_size_mb:.2f}MB)."

	text = await transcribe_audio(temp_audio_path)
	formatted = format_text(text)
	return formatted, "✅ Transkript başarıyla oluşturuldu."

	except Exception as e:
	return "", f"⚠️ Hata oluştu: {str(e)}"

	finally:
	delete_audio_file(temp_audio_path)

	async def transcribe_audio(audio_path):
	"""
	Whisper ile transkript oluşturma
	"""
	result = await asyncio.to_thread(
	model.transcribe,
	audio_path,
	language="tr",
	temperature=0.0,
	beam_size=5,
	fp16=False # CPU kullanımında hata önlemek için
	)
	return result["text"]

	with gr.Blocks() as demo:
	gr.Markdown("## 🎤 Türkçe Ses Transkript Aracı")
	gr.Markdown("""
	Türkçe sesleri yazıya döken araç. Maksimum 25MB boyutunda ses dosyaları yükleyebilirsiniz.
	""")

	with gr.Row():
	audio_input = gr.Audio(
	label="Ses dosyasını yükleyin veya kaydedin",
	type="filepath",
	autoplay=False,
	)

	transcribe_btn = gr.Button("Transkript oluştur")
	status_text = gr.Textbox(label="Durum", interactive=False)

	with gr.Row():
	output_text = gr.Textbox(label="Transkript Sonucu")

	transcribe_btn.click(
	fn=process_and_transcribe,
	inputs=[audio_input],
	outputs=[output_text, status_text],
	)

	demo.launch()