PierreBrunelle commited on
Commit
d1eac4f
·
verified ·
1 Parent(s): c12c045

Delete interface.py

Browse files
Files changed (1) hide show
  1. interface.py +0 -139
interface.py DELETED
@@ -1,139 +0,0 @@
1
- import gradio as gr
2
- from .processor import process_document
3
-
4
- def create_interface():
5
- with gr.Blocks(theme=gr.themes.Base()) as demo:
6
- gr.HTML(
7
- """
8
- <div style="margin-bottom: 1rem;">
9
- <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png"
10
- alt="Pixeltable" style="max-width: 150px;" />
11
- <h1>Document to Audio Synthesis</h1>
12
- </div>
13
- """
14
- )
15
-
16
- with gr.Row():
17
- with gr.Column():
18
- with gr.Accordion("What does it do?", open=True):
19
- gr.Markdown("""
20
- - PDF document processing and text extraction
21
- - Intelligent content transformation and summarization
22
- - High-quality audio synthesis with voice selection
23
- - Configurable processing parameters
24
- - Downloadable audio output
25
- """)
26
- with gr.Column():
27
- with gr.Accordion("How does it work?", open=True):
28
- gr.Markdown("""
29
- 1. **Document Processing**
30
- - Chunks document using token-based segmentation
31
- - Maintains document structure and context
32
-
33
- 2. **Content Processing**
34
- - Transforms text using LLM optimization
35
- - Generates optimized audio scripts
36
-
37
- 3. **Audio Synthesis**
38
- - Converts scripts to natural speech
39
- - Multiple voice models available
40
- """)
41
-
42
- with gr.Row():
43
- with gr.Column():
44
- api_key = gr.Textbox(
45
- label="OpenAI API Key",
46
- placeholder="sk-...",
47
- type="password"
48
- )
49
- file_input = gr.File(
50
- label="Input Document (PDF)",
51
- file_types=[".pdf"]
52
- )
53
-
54
- with gr.Accordion("Synthesis Parameters", open=True):
55
- voice_select = gr.Radio(
56
- choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
57
- value="onyx",
58
- label="Voice Model",
59
- info="TTS voice model selection"
60
- )
61
- style_select = gr.Radio(
62
- choices=["Technical", "Narrative", "Instructional", "Descriptive"],
63
- value="Technical",
64
- label="Processing Style",
65
- info="Content processing approach"
66
- )
67
-
68
- with gr.Accordion("Processing Parameters", open=False):
69
- chunk_size = gr.Slider(
70
- minimum=100, maximum=1000, value=300, step=50,
71
- label="Chunk Size (tokens)",
72
- info="Text segmentation size"
73
- )
74
- temperature = gr.Slider(
75
- minimum=0, maximum=1, value=0.7, step=0.1,
76
- label="Temperature",
77
- info="LLM randomness factor"
78
- )
79
- max_tokens = gr.Slider(
80
- minimum=100, maximum=1000, value=300, step=50,
81
- label="Max Tokens",
82
- info="Maximum output token limit"
83
- )
84
-
85
- process_btn = gr.Button("Process Document", variant="primary")
86
- status_output = gr.Textbox(label="Status")
87
-
88
- with gr.Tabs():
89
- with gr.TabItem("Content Processing"):
90
- output_table = gr.Dataframe(
91
- headers=["Segment", "Processed Content", "Audio Script"],
92
- wrap=True
93
- )
94
- with gr.TabItem("Audio Output"):
95
- audio_output = gr.Audio(
96
- label="Synthesized Audio",
97
- type="filepath",
98
- show_download_button=True
99
- )
100
-
101
- gr.Markdown("""
102
- ### Technical Notes
103
- - Token limit affects processing speed and memory usage
104
- - Temperature values > 0.8 may introduce content variations
105
- - Audio synthesis has a 4096 token limit per segment
106
-
107
- ### Performance Considerations
108
- - Chunk size directly impacts processing time
109
- - Higher temperatures increase LLM compute time
110
- - Audio synthesis scales with script length
111
- """)
112
-
113
- gr.HTML(
114
- """
115
- <div style="text-align: center; margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #ccc;">
116
- <p style="margin: 0; color: #666; font-size: 0.8em;">
117
- Powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none;">Pixeltable</a>
118
- | <a href="https://docs.pixeltable.io" target="_blank" style="color: #666;">Documentation</a>
119
- | <a href="https://huggingface.co/spaces/Pixeltable/document-to-audio-synthesis" target="_blank" style="color: #666;">Hugging Face Space</a>
120
- </p>
121
- </div>
122
- """
123
- )
124
-
125
- def update_interface(pdf_file, api_key, voice, style, chunk_size, temperature, max_tokens):
126
- return process_document(
127
- pdf_file, api_key, voice, style, chunk_size, temperature, max_tokens
128
- )
129
-
130
- process_btn.click(
131
- update_interface,
132
- inputs=[
133
- file_input, api_key, voice_select, style_select,
134
- chunk_size, temperature, max_tokens
135
- ],
136
- outputs=[output_table, audio_output, status_output]
137
- )
138
-
139
- return demo