Spaces:

UnSinnlos
/

122_

Runtime error

File size: 2,948 Bytes

1125bf9

import gradio as gr
from transformers import pipeline
import PyPDF2
import ebooklib
from ebooklib import epub
import re
import tempfile
import os
from pydub import AudioSegment

def read_pdf(file_path):
    with open(file_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

def read_epub(file_path):
    book = epub.read_epub(file_path)
    text = ""
    for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
        text += re.sub('<[^<]+?>', '', doc.get_content().decode()) + "\n"
    return text

def split_text_into_chunks(text, max_tokens=500):
    sentences = re.split(r'(?<=[.!?]) +', text)
    chunks, current_chunk = [], ""
    for sentence in sentences:
        if len((current_chunk + sentence).split()) > max_tokens:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + " "
        else:
            current_chunk += sentence + " "
    if current_chunk.strip():
        chunks.append(current_chunk.strip())
    return chunks

def tts_orpheus(text_chunks, token):
    pipe = pipeline("text-to-speech", model="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1", use_auth_token=token)
    combined_audio = AudioSegment.silent(duration=0)

    for chunk in text_chunks:
        output = pipe(chunk, forward_params={"speaker_id": 0})
        wav_path = output["audio"]
        audio_seg = AudioSegment.from_file(wav_path, format="wav")
        combined_audio += audio_seg

    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        combined_audio.export(tmp_file.name, format="mp3")
        return tmp_file.name

def process_file(file, token, max_tokens):
    ext = os.path.splitext(file.name)[-1].lower()
    if ext == ".pdf":
        text = read_pdf(file.name)
    elif ext == ".epub":
        text = read_epub(file.name)
    else:
        return "Ungültiges Dateiformat", None

    chunks = split_text_into_chunks(text, max_tokens=max_tokens)
    audio_path = tts_orpheus(chunks, token)
    return "Fertig!", audio_path

token_input = gr.Textbox(label="Hugging Face Token", type="password")
file_input = gr.File(label="EPUB oder PDF hochladen")
max_tokens_input = gr.Slider(100, 500, value=500, step=50, label="Maximale Tokens pro Chunk")

with gr.Blocks() as demo:
    gr.Markdown("## Kartoffel Orpheus TTS - EPUB/PDF zu Audio")
    with gr.Row():
        with gr.Column():
            token_box = token_input
            file_box = file_input
            token_limit_box = max_tokens_input
            start_btn = gr.Button("Starten")
        with gr.Column():
            status = gr.Textbox(label="Status")
            audio_out = gr.Audio(label="Ergebnis MP3", type="filepath")

    start_btn.click(fn=process_file, inputs=[file_box, token_box, token_limit_box], outputs=[status, audio_out])

if __name__ == "__main__":
    demo.launch()