PierreBrunelle commited on
Commit
7024de8
·
verified ·
1 Parent(s): 461f36f

Create processor.py

Browse files
Files changed (1) hide show
  1. processor.py +117 -0
processor.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pixeltable as pxt
2
+ from pixeltable.iterators import DocumentSplitter
3
+ from pixeltable.functions import openai
4
+ import os
5
+ import requests
6
+ import tempfile
7
+ import gradio as gr
8
+
9
+ def process_document(pdf_file, api_key, voice_choice, style_choice, chunk_size, temperature, max_tokens, progress=gr.Progress()):
10
+ try:
11
+ os.environ['OPENAI_API_KEY'] = api_key
12
+
13
+ progress(0.1, desc="Initializing...")
14
+ pxt.drop_dir('document_audio', force=True)
15
+ pxt.create_dir('document_audio')
16
+
17
+ docs = pxt.create_table(
18
+ 'document_audio.documents',
19
+ {
20
+ 'document': pxt.Document,
21
+ 'voice': pxt.String,
22
+ 'style': pxt.String
23
+ }
24
+ )
25
+
26
+ progress(0.2, desc="Processing document...")
27
+ docs.insert([{'document': pdf_file.name, 'voice': voice_choice, 'style': style_choice}])
28
+
29
+ chunks = pxt.create_view(
30
+ 'document_audio.chunks',
31
+ docs,
32
+ iterator=DocumentSplitter.create(
33
+ document=docs.document,
34
+ separators='token_limit',
35
+ limit=chunk_size
36
+ )
37
+ )
38
+
39
+ progress(0.4, desc="Text processing...")
40
+ chunks['content_response'] = openai.chat_completions(
41
+ messages=[
42
+ {
43
+ 'role': 'system',
44
+ 'content': """Transform this text segment into clear, concise content.
45
+ Structure:
46
+ 1. Core concepts and points
47
+ 2. Supporting details
48
+ 3. Key takeaways"""
49
+ },
50
+ {'role': 'user', 'content': chunks.text}
51
+ ],
52
+ model='gpt-4o-mini-2024-07-18',
53
+ max_tokens=max_tokens,
54
+ temperature=temperature
55
+ )
56
+ chunks['content'] = chunks.content_response['choices'][0]['message']['content']
57
+
58
+ progress(0.6, desc="Script generation...")
59
+ chunks['script_response'] = openai.chat_completions(
60
+ messages=[
61
+ {
62
+ 'role': 'system',
63
+ 'content': f"""Convert content to audio script.
64
+ Style: {docs.style}
65
+ Format:
66
+ - Clear sentence structures
67
+ - Natural pauses (...)
68
+ - Term definitions when needed
69
+ - Proper transitions"""
70
+ },
71
+ {'role': 'user', 'content': chunks.content}
72
+ ],
73
+ model='gpt-4o-mini-2024-07-18',
74
+ max_tokens=max_tokens,
75
+ temperature=temperature
76
+ )
77
+ chunks['script'] = chunks.script_response['choices'][0]['message']['content']
78
+
79
+ progress(0.8, desc="Audio synthesis...")
80
+ @pxt.udf(return_type=pxt.Audio)
81
+ def generate_audio(script: str, voice: str):
82
+ if not script or not voice:
83
+ return None
84
+ try:
85
+ response = requests.post(
86
+ "https://api.openai.com/v1/audio/speech",
87
+ headers={"Authorization": f"Bearer {api_key}"},
88
+ json={"model": "tts-1", "input": script, "voice": voice}
89
+ )
90
+ if response.status_code == 200:
91
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
92
+ temp_file.write(response.content)
93
+ temp_file.close()
94
+ return temp_file.name
95
+ except Exception as e:
96
+ print(f"Error in audio synthesis: {e}")
97
+ return None
98
+
99
+ chunks['audio'] = generate_audio(chunks.script, docs.voice)
100
+
101
+ audio_path = chunks.select(chunks.audio).tail(1)['audio'][0]
102
+
103
+ results = chunks.select(
104
+ chunks.content,
105
+ chunks.script
106
+ ).collect()
107
+
108
+ display_data = [
109
+ [f"Segment {idx + 1}", row['content'], row['script']]
110
+ for idx, row in enumerate(results)
111
+ ]
112
+
113
+ progress(1.0, desc="Complete")
114
+ return display_data, audio_path, "Processing complete"
115
+
116
+ except Exception as e:
117
+ return None, None, f"Error: {str(e)}"