PierreBrunelle's picture
Create processor.py
7024de8 verified
raw
history blame
4.26 kB
import pixeltable as pxt
from pixeltable.iterators import DocumentSplitter
from pixeltable.functions import openai
import os
import requests
import tempfile
import gradio as gr
def process_document(pdf_file, api_key, voice_choice, style_choice, chunk_size, temperature, max_tokens, progress=gr.Progress()):
try:
os.environ['OPENAI_API_KEY'] = api_key
progress(0.1, desc="Initializing...")
pxt.drop_dir('document_audio', force=True)
pxt.create_dir('document_audio')
docs = pxt.create_table(
'document_audio.documents',
{
'document': pxt.Document,
'voice': pxt.String,
'style': pxt.String
}
)
progress(0.2, desc="Processing document...")
docs.insert([{'document': pdf_file.name, 'voice': voice_choice, 'style': style_choice}])
chunks = pxt.create_view(
'document_audio.chunks',
docs,
iterator=DocumentSplitter.create(
document=docs.document,
separators='token_limit',
limit=chunk_size
)
)
progress(0.4, desc="Text processing...")
chunks['content_response'] = openai.chat_completions(
messages=[
{
'role': 'system',
'content': """Transform this text segment into clear, concise content.
Structure:
1. Core concepts and points
2. Supporting details
3. Key takeaways"""
},
{'role': 'user', 'content': chunks.text}
],
model='gpt-4o-mini-2024-07-18',
max_tokens=max_tokens,
temperature=temperature
)
chunks['content'] = chunks.content_response['choices'][0]['message']['content']
progress(0.6, desc="Script generation...")
chunks['script_response'] = openai.chat_completions(
messages=[
{
'role': 'system',
'content': f"""Convert content to audio script.
Style: {docs.style}
Format:
- Clear sentence structures
- Natural pauses (...)
- Term definitions when needed
- Proper transitions"""
},
{'role': 'user', 'content': chunks.content}
],
model='gpt-4o-mini-2024-07-18',
max_tokens=max_tokens,
temperature=temperature
)
chunks['script'] = chunks.script_response['choices'][0]['message']['content']
progress(0.8, desc="Audio synthesis...")
@pxt.udf(return_type=pxt.Audio)
def generate_audio(script: str, voice: str):
if not script or not voice:
return None
try:
response = requests.post(
"https://api.openai.com/v1/audio/speech",
headers={"Authorization": f"Bearer {api_key}"},
json={"model": "tts-1", "input": script, "voice": voice}
)
if response.status_code == 200:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
temp_file.write(response.content)
temp_file.close()
return temp_file.name
except Exception as e:
print(f"Error in audio synthesis: {e}")
return None
chunks['audio'] = generate_audio(chunks.script, docs.voice)
audio_path = chunks.select(chunks.audio).tail(1)['audio'][0]
results = chunks.select(
chunks.content,
chunks.script
).collect()
display_data = [
[f"Segment {idx + 1}", row['content'], row['script']]
for idx, row in enumerate(results)
]
progress(1.0, desc="Complete")
return display_data, audio_path, "Processing complete"
except Exception as e:
return None, None, f"Error: {str(e)}"