Spaces:

Pixeltable
/

Document-to-Audio-Synthesis

Sleeping

App Files Files Community

PierreBrunelle commited on Oct 23, 2024

Commit

d1eac4f

verified ·

1 Parent(s): c12c045

Delete interface.py

Browse files

Files changed (1) hide show

interface.py +0 -139

interface.py DELETED Viewed

@@ -1,139 +0,0 @@
-import gradio as gr
-from .processor import process_document
-def create_interface():
-    with gr.Blocks(theme=gr.themes.Base()) as demo:
-        gr.HTML(
-            """
-            <div style="margin-bottom: 1rem;">
-                <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png"
-                     alt="Pixeltable" style="max-width: 150px;" />
-                <h1>Document to Audio Synthesis</h1>
-            </div>
-            """
-        )
-        with gr.Row():
-            with gr.Column():
-                with gr.Accordion("What does it do?", open=True):
-                    gr.Markdown("""
-                        - PDF document processing and text extraction
-                        - Intelligent content transformation and summarization
-                        - High-quality audio synthesis with voice selection
-                        - Configurable processing parameters
-                        - Downloadable audio output
-                    """)
-            with gr.Column():
-                with gr.Accordion("How does it work?", open=True):
-                    gr.Markdown("""
-                        1. **Document Processing**
-                           - Chunks document using token-based segmentation
-                           - Maintains document structure and context
-                        2. **Content Processing**
-                           - Transforms text using LLM optimization
-                           - Generates optimized audio scripts
-                        3. **Audio Synthesis**
-                           - Converts scripts to natural speech
-                           - Multiple voice models available
-                    """)
-        with gr.Row():
-            with gr.Column():
-                api_key = gr.Textbox(
-                    label="OpenAI API Key",
-                    placeholder="sk-...",
-                    type="password"
-                )
-                file_input = gr.File(
-                    label="Input Document (PDF)",
-                    file_types=[".pdf"]
-                )
-                with gr.Accordion("Synthesis Parameters", open=True):
-                    voice_select = gr.Radio(
-                        choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-                        value="onyx",
-                        label="Voice Model",
-                        info="TTS voice model selection"
-                    )
-                    style_select = gr.Radio(
-                        choices=["Technical", "Narrative", "Instructional", "Descriptive"],
-                        value="Technical",
-                        label="Processing Style",
-                        info="Content processing approach"
-                    )
-                with gr.Accordion("Processing Parameters", open=False):
-                    chunk_size = gr.Slider(
-                        minimum=100, maximum=1000, value=300, step=50,
-                        label="Chunk Size (tokens)",
-                        info="Text segmentation size"
-                    )
-                    temperature = gr.Slider(
-                        minimum=0, maximum=1, value=0.7, step=0.1,
-                        label="Temperature",
-                        info="LLM randomness factor"
-                    )
-                    max_tokens = gr.Slider(
-                        minimum=100, maximum=1000, value=300, step=50,
-                        label="Max Tokens",
-                        info="Maximum output token limit"
-                    )
-                process_btn = gr.Button("Process Document", variant="primary")
-                status_output = gr.Textbox(label="Status")
-        with gr.Tabs():
-            with gr.TabItem("Content Processing"):
-                output_table = gr.Dataframe(
-                    headers=["Segment", "Processed Content", "Audio Script"],
-                    wrap=True
-                )
-            with gr.TabItem("Audio Output"):
-                audio_output = gr.Audio(
-                    label="Synthesized Audio",
-                    type="filepath",
-                    show_download_button=True
-                )
-        gr.Markdown("""
-            ### Technical Notes
-            - Token limit affects processing speed and memory usage
-            - Temperature values > 0.8 may introduce content variations
-            - Audio synthesis has a 4096 token limit per segment
-            ### Performance Considerations
-            - Chunk size directly impacts processing time
-            - Higher temperatures increase LLM compute time
-            - Audio synthesis scales with script length
-        """)
-        gr.HTML(
-            """
-            <div style="text-align: center; margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #ccc;">
-                <p style="margin: 0; color: #666; font-size: 0.8em;">
-                    Powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none;">Pixeltable</a>
-                    | <a href="https://docs.pixeltable.io" target="_blank" style="color: #666;">Documentation</a>
-                    | <a href="https://huggingface.co/spaces/Pixeltable/document-to-audio-synthesis" target="_blank" style="color: #666;">Hugging Face Space</a>
-                </p>
-            </div>
-            """
-        )
-        def update_interface(pdf_file, api_key, voice, style, chunk_size, temperature, max_tokens):
-            return process_document(
-                pdf_file, api_key, voice, style, chunk_size, temperature, max_tokens
-            )
-        process_btn.click(
-            update_interface,
-            inputs=[
-                file_input, api_key, voice_select, style_select,
-                chunk_size, temperature, max_tokens
-            ],
-            outputs=[output_table, audio_output, status_output]
-        )
-    return demo