Spaces:

UnSinnlos
/

122_

Runtime error

App Files Files Community

122_ / app.py

UnSinnlos

Update app.py

1125bf9 verified 8 days ago

raw

history blame contribute delete

2.95 kB

	import gradio as gr
	from transformers import pipeline
	import PyPDF2
	import ebooklib
	from ebooklib import epub
	import re
	import tempfile
	import os
	from pydub import AudioSegment

	def read_pdf(file_path):
	with open(file_path, 'rb') as f:
	reader = PyPDF2.PdfReader(f)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text

	def read_epub(file_path):
	book = epub.read_epub(file_path)
	text = ""
	for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
	text += re.sub('<[^<]+?>', '', doc.get_content().decode()) + "\n"
	return text

	def split_text_into_chunks(text, max_tokens=500):
	sentences = re.split(r'(?<=[.!?]) +', text)
	chunks, current_chunk = [], ""
	for sentence in sentences:
	if len((current_chunk + sentence).split()) > max_tokens:
	chunks.append(current_chunk.strip())
	current_chunk = sentence + " "
	else:
	current_chunk += sentence + " "
	if current_chunk.strip():
	chunks.append(current_chunk.strip())
	return chunks

	def tts_orpheus(text_chunks, token):
	pipe = pipeline("text-to-speech", model="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1", use_auth_token=token)
	combined_audio = AudioSegment.silent(duration=0)

	for chunk in text_chunks:
	output = pipe(chunk, forward_params={"speaker_id": 0})
	wav_path = output["audio"]
	audio_seg = AudioSegment.from_file(wav_path, format="wav")
	combined_audio += audio_seg

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	combined_audio.export(tmp_file.name, format="mp3")
	return tmp_file.name

	def process_file(file, token, max_tokens):
	ext = os.path.splitext(file.name)[-1].lower()
	if ext == ".pdf":
	text = read_pdf(file.name)
	elif ext == ".epub":
	text = read_epub(file.name)
	else:
	return "Ungültiges Dateiformat", None

	chunks = split_text_into_chunks(text, max_tokens=max_tokens)
	audio_path = tts_orpheus(chunks, token)
	return "Fertig!", audio_path

	token_input = gr.Textbox(label="Hugging Face Token", type="password")
	file_input = gr.File(label="EPUB oder PDF hochladen")
	max_tokens_input = gr.Slider(100, 500, value=500, step=50, label="Maximale Tokens pro Chunk")

	with gr.Blocks() as demo:
	gr.Markdown("## Kartoffel Orpheus TTS - EPUB/PDF zu Audio")
	with gr.Row():
	with gr.Column():
	token_box = token_input
	file_box = file_input
	token_limit_box = max_tokens_input
	start_btn = gr.Button("Starten")
	with gr.Column():
	status = gr.Textbox(label="Status")
	audio_out = gr.Audio(label="Ergebnis MP3", type="filepath")

	start_btn.click(fn=process_file, inputs=[file_box, token_box, token_limit_box], outputs=[status, audio_out])

	if __name__ == "__main__":
	demo.launch()