PierreBrunelle commited on
Commit
58c6bad
·
verified ·
1 Parent(s): 5a88b24

Update src/interface.py

Browse files
Files changed (1) hide show
  1. src/interface.py +43 -43
src/interface.py CHANGED
@@ -8,115 +8,115 @@ def create_interface():
8
  <div style="margin-bottom: 1rem;">
9
  <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png"
10
  alt="Pixeltable" style="max-width: 150px;" />
11
- <h1>Document to Audio Synthesis</h1>
12
  </div>
13
  """
14
  )
15
 
16
  with gr.Row():
17
  with gr.Column():
18
- with gr.Accordion("What does it do?", open=True):
19
  gr.Markdown("""
20
- - PDF document processing and text extraction
21
- - Intelligent content transformation and summarization
22
- - High-quality audio synthesis with voice selection
23
- - Configurable processing parameters
24
- - Downloadable audio output
25
  """)
26
  with gr.Column():
27
- with gr.Accordion("How does it work?", open=True):
28
  gr.Markdown("""
29
- 1. **Document Processing**
30
- - Chunks document using token-based segmentation
31
- - Maintains document structure and context
32
 
33
- 2. **Content Processing**
34
- - Transforms text using LLM optimization
35
- - Generates optimized audio scripts
36
 
37
- 3. **Audio Synthesis**
38
- - Converts scripts to natural speech
39
- - Multiple voice models available
40
  """)
41
 
42
  with gr.Row():
43
  with gr.Column():
44
  api_key = gr.Textbox(
45
- label="OpenAI API Key",
46
  placeholder="sk-...",
47
  type="password"
48
  )
49
  file_input = gr.File(
50
- label="Input Document (PDF)",
51
  file_types=[".pdf"]
52
  )
53
 
54
- with gr.Accordion("Synthesis Parameters", open=True):
55
  voice_select = gr.Radio(
56
  choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
57
  value="onyx",
58
- label="Voice Model",
59
  info="TTS voice model selection"
60
  )
61
  style_select = gr.Radio(
62
  choices=["Technical", "Narrative", "Instructional", "Descriptive"],
63
  value="Technical",
64
- label="Processing Style",
65
  info="Content processing approach"
66
  )
67
 
68
- with gr.Accordion("Processing Parameters", open=False):
69
  chunk_size = gr.Slider(
70
  minimum=100, maximum=1000, value=300, step=50,
71
- label="Chunk Size (tokens)",
72
  info="Text segmentation size"
73
  )
74
  temperature = gr.Slider(
75
  minimum=0, maximum=1, value=0.7, step=0.1,
76
- label="Temperature",
77
  info="LLM randomness factor"
78
  )
79
  max_tokens = gr.Slider(
80
  minimum=100, maximum=1000, value=300, step=50,
81
- label="Max Tokens",
82
  info="Maximum output token limit"
83
  )
84
 
85
- process_btn = gr.Button("Process Document", variant="primary")
86
- status_output = gr.Textbox(label="Status")
87
 
88
  with gr.Tabs():
89
- with gr.TabItem("Content Processing"):
90
  output_table = gr.Dataframe(
91
- headers=["Segment", "Processed Content", "Audio Script"],
92
  wrap=True
93
  )
94
- with gr.TabItem("Audio Output"):
95
  audio_output = gr.Audio(
96
- label="Synthesized Audio",
97
  type="filepath",
98
  show_download_button=True
99
  )
100
 
101
  gr.Markdown("""
102
- ### Technical Notes
103
- - Token limit affects processing speed and memory usage
104
- - Temperature values > 0.8 may introduce content variations
105
- - Audio synthesis has a 4096 token limit per segment
106
 
107
- ### Performance Considerations
108
- - Chunk size directly impacts processing time
109
- - Higher temperatures increase LLM compute time
110
- - Audio synthesis scales with script length
111
  """)
112
 
113
  gr.HTML(
114
  """
115
  <div style="text-align: center; margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #ccc;">
116
  <p style="margin: 0; color: #666; font-size: 0.8em;">
117
- Powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none;">Pixeltable</a>
118
- | <a href="https://docs.pixeltable.io" target="_blank" style="color: #666;">Documentation</a>
119
- | <a href="https://huggingface.co/spaces/Pixeltable/document-to-audio-synthesis" target="_blank" style="color: #666;">Hugging Face Space</a>
120
  </p>
121
  </div>
122
  """
 
8
  <div style="margin-bottom: 1rem;">
9
  <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png"
10
  alt="Pixeltable" style="max-width: 150px;" />
11
+ <h1>📄 Document to Audio Synthesis 🎧</h1>
12
  </div>
13
  """
14
  )
15
 
16
  with gr.Row():
17
  with gr.Column():
18
+ with gr.Accordion("🎯 What does it do?", open=True):
19
  gr.Markdown("""
20
+ - 📄 PDF document processing and text extraction
21
+ - 🧠 Intelligent content transformation and summarization
22
+ - 🎧 High-quality audio synthesis with voice selection
23
+ - ⚙️ Configurable processing parameters
24
+ - ⬇️ Downloadable audio output
25
  """)
26
  with gr.Column():
27
+ with gr.Accordion("How does it work?", open=True):
28
  gr.Markdown("""
29
+ 1. 📑 **Document Processing**
30
+ - 📊 Chunks document using token-based segmentation
31
+ - 🔄 Maintains document structure and context
32
 
33
+ 2. 🔍 **Content Processing**
34
+ - 🤖 Transforms text using LLM optimization
35
+ - 📝 Generates optimized audio scripts
36
 
37
+ 3. 🎵 **Audio Synthesis**
38
+ - 🗣️ Converts scripts to natural speech
39
+ - 🎙️ Multiple voice models available
40
  """)
41
 
42
  with gr.Row():
43
  with gr.Column():
44
  api_key = gr.Textbox(
45
+ label="🔑 OpenAI API Key",
46
  placeholder="sk-...",
47
  type="password"
48
  )
49
  file_input = gr.File(
50
+ label="📁 Input Document (PDF)",
51
  file_types=[".pdf"]
52
  )
53
 
54
+ with gr.Accordion("🎛️ Synthesis Parameters", open=True):
55
  voice_select = gr.Radio(
56
  choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
57
  value="onyx",
58
+ label="🎙️ Voice Model",
59
  info="TTS voice model selection"
60
  )
61
  style_select = gr.Radio(
62
  choices=["Technical", "Narrative", "Instructional", "Descriptive"],
63
  value="Technical",
64
+ label="💫 Processing Style",
65
  info="Content processing approach"
66
  )
67
 
68
+ with gr.Accordion("⚙️ Processing Parameters", open=False):
69
  chunk_size = gr.Slider(
70
  minimum=100, maximum=1000, value=300, step=50,
71
+ label="📏 Chunk Size (tokens)",
72
  info="Text segmentation size"
73
  )
74
  temperature = gr.Slider(
75
  minimum=0, maximum=1, value=0.7, step=0.1,
76
+ label="🌡️ Temperature",
77
  info="LLM randomness factor"
78
  )
79
  max_tokens = gr.Slider(
80
  minimum=100, maximum=1000, value=300, step=50,
81
+ label="📊 Max Tokens",
82
  info="Maximum output token limit"
83
  )
84
 
85
+ process_btn = gr.Button("🚀 Process Document", variant="primary")
86
+ status_output = gr.Textbox(label="📋 Status")
87
 
88
  with gr.Tabs():
89
+ with gr.TabItem("📝 Content Processing"):
90
  output_table = gr.Dataframe(
91
+ headers=["🔍 Segment", "📄 Processed Content", "🎭 Audio Script"],
92
  wrap=True
93
  )
94
+ with gr.TabItem("🎧 Audio Output"):
95
  audio_output = gr.Audio(
96
+ label="🔊 Synthesized Audio",
97
  type="filepath",
98
  show_download_button=True
99
  )
100
 
101
  gr.Markdown("""
102
+ ### 📚 Technical Notes
103
+ - Token limit affects processing speed and memory usage
104
+ - 🎯 Temperature values > 0.8 may introduce content variations
105
+ - 🔊 Audio synthesis has a 4096 token limit per segment
106
 
107
+ ### ⚙️ Performance Considerations
108
+ - 📊 Chunk size directly impacts processing time
109
+ - 🔄 Higher temperatures increase LLM compute time
110
+ - ⏱️ Audio synthesis scales with script length
111
  """)
112
 
113
  gr.HTML(
114
  """
115
  <div style="text-align: center; margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #ccc;">
116
  <p style="margin: 0; color: #666; font-size: 0.8em;">
117
+ 🚀 Powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none;">Pixeltable</a>
118
+ | 📚 <a href="https://docs.pixeltable.io" target="_blank" style="color: #666;">Documentation</a>
119
+ | 🤗 <a href="https://huggingface.co/spaces/Pixeltable/document-to-audio-synthesis" target="_blank" style="color: #666;">Hugging Face Space</a>
120
  </p>
121
  </div>
122
  """