import gradio as gr from transformers import pipeline import PyPDF2 import ebooklib from ebooklib import epub import re import tempfile import os from pydub import AudioSegment def read_pdf(file_path): with open(file_path, 'rb') as f: reader = PyPDF2.PdfReader(f) text = "" for page in reader.pages: text += page.extract_text() + "\n" return text def read_epub(file_path): book = epub.read_epub(file_path) text = "" for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): text += re.sub('<[^<]+?>', '', doc.get_content().decode()) + "\n" return text def split_text_into_chunks(text, max_tokens=500): sentences = re.split(r'(?<=[.!?]) +', text) chunks, current_chunk = [], "" for sentence in sentences: if len((current_chunk + sentence).split()) > max_tokens: chunks.append(current_chunk.strip()) current_chunk = sentence + " " else: current_chunk += sentence + " " if current_chunk.strip(): chunks.append(current_chunk.strip()) return chunks def tts_orpheus(text_chunks, token): pipe = pipeline("text-to-speech", model="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1", use_auth_token=token) combined_audio = AudioSegment.silent(duration=0) for chunk in text_chunks: output = pipe(chunk, forward_params={"speaker_id": 0}) wav_path = output["audio"] audio_seg = AudioSegment.from_file(wav_path, format="wav") combined_audio += audio_seg with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: combined_audio.export(tmp_file.name, format="mp3") return tmp_file.name def process_file(file, token, max_tokens): ext = os.path.splitext(file.name)[-1].lower() if ext == ".pdf": text = read_pdf(file.name) elif ext == ".epub": text = read_epub(file.name) else: return "Ungültiges Dateiformat", None chunks = split_text_into_chunks(text, max_tokens=max_tokens) audio_path = tts_orpheus(chunks, token) return "Fertig!", audio_path token_input = gr.Textbox(label="Hugging Face Token", type="password") file_input = gr.File(label="EPUB oder PDF hochladen") max_tokens_input = gr.Slider(100, 500, value=500, step=50, label="Maximale Tokens pro Chunk") with gr.Blocks() as demo: gr.Markdown("## Kartoffel Orpheus TTS - EPUB/PDF zu Audio") with gr.Row(): with gr.Column(): token_box = token_input file_box = file_input token_limit_box = max_tokens_input start_btn = gr.Button("Starten") with gr.Column(): status = gr.Textbox(label="Status") audio_out = gr.Audio(label="Ergebnis MP3", type="filepath") start_btn.click(fn=process_file, inputs=[file_box, token_box, token_limit_box], outputs=[status, audio_out]) if __name__ == "__main__": demo.launch()