|
import gradio as gr |
|
from transformers import pipeline |
|
import PyPDF2 |
|
import ebooklib |
|
from ebooklib import epub |
|
import re |
|
import tempfile |
|
import os |
|
from pydub import AudioSegment |
|
|
|
def read_pdf(file_path): |
|
with open(file_path, 'rb') as f: |
|
reader = PyPDF2.PdfReader(f) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() + "\n" |
|
return text |
|
|
|
def read_epub(file_path): |
|
book = epub.read_epub(file_path) |
|
text = "" |
|
for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): |
|
text += re.sub('<[^<]+?>', '', doc.get_content().decode()) + "\n" |
|
return text |
|
|
|
def split_text_into_chunks(text, max_tokens=500): |
|
sentences = re.split(r'(?<=[.!?]) +', text) |
|
chunks, current_chunk = [], "" |
|
for sentence in sentences: |
|
if len((current_chunk + sentence).split()) > max_tokens: |
|
chunks.append(current_chunk.strip()) |
|
current_chunk = sentence + " " |
|
else: |
|
current_chunk += sentence + " " |
|
if current_chunk.strip(): |
|
chunks.append(current_chunk.strip()) |
|
return chunks |
|
|
|
def tts_orpheus(text_chunks, token): |
|
pipe = pipeline("text-to-speech", model="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1", use_auth_token=token) |
|
combined_audio = AudioSegment.silent(duration=0) |
|
|
|
for chunk in text_chunks: |
|
output = pipe(chunk, forward_params={"speaker_id": 0}) |
|
wav_path = output["audio"] |
|
audio_seg = AudioSegment.from_file(wav_path, format="wav") |
|
combined_audio += audio_seg |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
|
combined_audio.export(tmp_file.name, format="mp3") |
|
return tmp_file.name |
|
|
|
def process_file(file, token, max_tokens): |
|
ext = os.path.splitext(file.name)[-1].lower() |
|
if ext == ".pdf": |
|
text = read_pdf(file.name) |
|
elif ext == ".epub": |
|
text = read_epub(file.name) |
|
else: |
|
return "Ungültiges Dateiformat", None |
|
|
|
chunks = split_text_into_chunks(text, max_tokens=max_tokens) |
|
audio_path = tts_orpheus(chunks, token) |
|
return "Fertig!", audio_path |
|
|
|
token_input = gr.Textbox(label="Hugging Face Token", type="password") |
|
file_input = gr.File(label="EPUB oder PDF hochladen") |
|
max_tokens_input = gr.Slider(100, 500, value=500, step=50, label="Maximale Tokens pro Chunk") |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Kartoffel Orpheus TTS - EPUB/PDF zu Audio") |
|
with gr.Row(): |
|
with gr.Column(): |
|
token_box = token_input |
|
file_box = file_input |
|
token_limit_box = max_tokens_input |
|
start_btn = gr.Button("Starten") |
|
with gr.Column(): |
|
status = gr.Textbox(label="Status") |
|
audio_out = gr.Audio(label="Ergebnis MP3", type="filepath") |
|
|
|
start_btn.click(fn=process_file, inputs=[file_box, token_box, token_limit_box], outputs=[status, audio_out]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |