import gradio as gr from .processor import process_document def create_interface(): with gr.Blocks(theme=gr.themes.Base()) as demo: gr.HTML( """

📄 Document to Audio Synthesis 🎧

""" ) with gr.Row(): with gr.Column(): with gr.Accordion("🎯 What does it do?", open=True): gr.Markdown(""" - 📄 PDF document processing and text extraction - 🧠 Intelligent content transformation and summarization - 🎧 High-quality audio synthesis with voice selection - ⚙️ Configurable processing parameters - ⬇️ Downloadable audio output """) with gr.Column(): with gr.Accordion("⚡ How does it work?", open=True): gr.Markdown(""" 1. 📑 **Document Processing** - 📊 Chunks document using token-based segmentation - 🔄 Maintains document structure and context 2. 🔍 **Content Processing** - 🤖 Transforms text using LLM optimization - 📝 Generates optimized audio scripts 3. 🎵 **Audio Synthesis** - 🗣️ Converts scripts to natural speech - 🎙️ Multiple voice models available """) with gr.Row(): with gr.Column(): api_key = gr.Textbox( label="🔑 OpenAI API Key", placeholder="sk-...", type="password" ) file_input = gr.File( label="📁 Input Document (PDF)", file_types=[".pdf"] ) with gr.Accordion("🎛️ Synthesis Parameters", open=True): voice_select = gr.Radio( choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"], value="onyx", label="🎙️ Voice Model", info="TTS voice model selection" ) style_select = gr.Radio( choices=["Technical", "Narrative", "Instructional", "Descriptive"], value="Technical", label="💫 Processing Style", info="Content processing approach" ) with gr.Accordion("⚙️ Processing Parameters", open=False): chunk_size = gr.Slider( minimum=100, maximum=1000, value=300, step=50, label="📏 Chunk Size (tokens)", info="Text segmentation size" ) temperature = gr.Slider( minimum=0, maximum=1, value=0.7, step=0.1, label="🌡️ Temperature", info="LLM randomness factor" ) max_tokens = gr.Slider( minimum=100, maximum=1000, value=300, step=50, label="📊 Max Tokens", info="Maximum output token limit" ) process_btn = gr.Button("🚀 Process Document", variant="primary") status_output = gr.Textbox(label="📋 Status") with gr.Tabs(): with gr.TabItem("📝 Content Processing"): output_table = gr.Dataframe( headers=["🔍 Segment", "📄 Processed Content", "🎭 Audio Script"], wrap=True ) with gr.TabItem("🎧 Audio Output"): audio_output = gr.Audio( label="🔊 Synthesized Audio", type="filepath", show_download_button=True ) gr.Markdown(""" ### 📚 Technical Notes - ⚡ Token limit affects processing speed and memory usage - 🎯 Temperature values > 0.8 may introduce content variations - 🔊 Audio synthesis has a 4096 token limit per segment ### ⚙️ Performance Considerations - 📊 Chunk size directly impacts processing time - 🔄 Higher temperatures increase LLM compute time - ⏱️ Audio synthesis scales with script length """) gr.HTML( """

🚀 Powered by Pixeltable | 📚 Documentation | 🤗 Hugging Face Space

""" ) def update_interface(pdf_file, api_key, voice, style, chunk_size, temperature, max_tokens): return process_document( pdf_file, api_key, voice, style, chunk_size, temperature, max_tokens ) process_btn.click( update_interface, inputs=[ file_input, api_key, voice_select, style_select, chunk_size, temperature, max_tokens ], outputs=[output_table, audio_output, status_output] ) return demo