Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- __init__.py +6 -0
- app.py +164 -0
- audio_processor.py +201 -0
- config.py +36 -0
- content_generator.py +147 -0
- requirements.txt +18 -0
__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Antalya Blog Post Generator package.
|
3 |
+
A specialized AI agent for generating culturally rich content about Antalya.
|
4 |
+
"""
|
5 |
+
|
6 |
+
__version__ = "0.1.0"
|
app.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from datetime import datetime
|
4 |
+
from docx import Document
|
5 |
+
from docx.shared import Pt, RGBColor
|
6 |
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
7 |
+
from audio_processor import AudioProcessor
|
8 |
+
import config
|
9 |
+
|
10 |
+
class NewsApp:
|
11 |
+
def __init__(self):
|
12 |
+
self.processor = AudioProcessor()
|
13 |
+
|
14 |
+
def process_audio_file(self, audio_file, content_type="news", language="tr"):
|
15 |
+
"""Process audio file and generate content"""
|
16 |
+
try:
|
17 |
+
if audio_file is None:
|
18 |
+
return "Lütfen bir ses dosyası yükleyin.", None
|
19 |
+
|
20 |
+
# Print debug information
|
21 |
+
print(f"Received audio file: {audio_file}")
|
22 |
+
|
23 |
+
# Create temporary file to save the uploaded content
|
24 |
+
temp_dir = "temp_audio"
|
25 |
+
os.makedirs(temp_dir, exist_ok=True)
|
26 |
+
|
27 |
+
# Generate a unique filename
|
28 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
29 |
+
temp_audio_path = os.path.join(temp_dir, f"temp_audio_{timestamp}.m4a")
|
30 |
+
|
31 |
+
# Save the uploaded file
|
32 |
+
with open(temp_audio_path, "wb") as f:
|
33 |
+
f.write(audio_file)
|
34 |
+
|
35 |
+
print(f"Saved temporary file to: {temp_audio_path}")
|
36 |
+
|
37 |
+
# Process audio and generate content
|
38 |
+
results = self.processor.process_audio(
|
39 |
+
audio_path=temp_audio_path,
|
40 |
+
language=language,
|
41 |
+
content_type=content_type,
|
42 |
+
generate_content=True
|
43 |
+
)
|
44 |
+
|
45 |
+
if not results.get("generated_content"):
|
46 |
+
return "İçerik oluşturulamadı. Lütfen ses kaydını kontrol edin.", None
|
47 |
+
|
48 |
+
# Create Word document
|
49 |
+
doc = Document()
|
50 |
+
|
51 |
+
# Add title
|
52 |
+
title = doc.add_heading(results["generated_content"]["title"], 0)
|
53 |
+
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
54 |
+
|
55 |
+
# Add date
|
56 |
+
date_paragraph = doc.add_paragraph()
|
57 |
+
date_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
58 |
+
date_run = date_paragraph.add_run(f"Tarih: {results['date']}")
|
59 |
+
date_run.font.size = Pt(10)
|
60 |
+
date_run.font.color.rgb = RGBColor(128, 128, 128)
|
61 |
+
|
62 |
+
# Add separator
|
63 |
+
doc.add_paragraph("").add_run("_" * 50)
|
64 |
+
|
65 |
+
# Add content
|
66 |
+
content_lines = results["generated_content"]["content"].split('\n')
|
67 |
+
current_paragraph = None
|
68 |
+
|
69 |
+
for line in content_lines:
|
70 |
+
if line.strip():
|
71 |
+
if line.startswith('#'): # Handle headers
|
72 |
+
level = line.count('#')
|
73 |
+
text = line.strip('#').strip()
|
74 |
+
doc.add_heading(text, level)
|
75 |
+
else:
|
76 |
+
if current_paragraph is None or line.startswith('*'):
|
77 |
+
current_paragraph = doc.add_paragraph()
|
78 |
+
current_paragraph.add_run(line)
|
79 |
+
else:
|
80 |
+
current_paragraph = None
|
81 |
+
|
82 |
+
# Save document
|
83 |
+
output_dir = "data/output"
|
84 |
+
os.makedirs(output_dir, exist_ok=True)
|
85 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
86 |
+
doc_path = os.path.join(output_dir, f"haber_{timestamp}.docx")
|
87 |
+
doc.save(doc_path)
|
88 |
+
|
89 |
+
# Return success message and document path
|
90 |
+
return f"İçerik başarıyla oluşturuldu!", doc_path
|
91 |
+
|
92 |
+
except Exception as e:
|
93 |
+
return f"Hata oluştu: {str(e)}", None
|
94 |
+
|
95 |
+
def create_ui():
|
96 |
+
"""Create Gradio interface"""
|
97 |
+
app = NewsApp()
|
98 |
+
|
99 |
+
with gr.Blocks(title="Ses Dosyasından Haber Oluşturma", theme=gr.themes.Soft()) as interface:
|
100 |
+
gr.Markdown("""
|
101 |
+
# 🎙️ Ses Dosyasından Haber/Blog Oluşturma
|
102 |
+
|
103 |
+
Ses kaydınızı yükleyin, yapay zeka destekli sistemimiz sizin için profesyonel bir haber metni veya blog yazısı oluştursun.
|
104 |
+
|
105 |
+
### Nasıl Kullanılır:
|
106 |
+
1. Ses dosyanızı yükleyin (.mp3, .m4a, .wav formatları desteklenir)
|
107 |
+
2. İçerik tipini seçin (Haber/Blog)
|
108 |
+
3. Dili seçin
|
109 |
+
4. "Oluştur" butonuna tıklayın
|
110 |
+
5. Oluşturulan Word belgesini indirin
|
111 |
+
|
112 |
+
### Önemli Notlar:
|
113 |
+
- Desteklenen ses formatları: MP3, M4A, WAV
|
114 |
+
- Maksimum dosya boyutu: 25MB
|
115 |
+
- İşlem süresi dosya boyutuna göre değişebilir
|
116 |
+
- Türkçe ve İngilizce dilleri desteklenmektedir
|
117 |
+
""")
|
118 |
+
|
119 |
+
with gr.Row():
|
120 |
+
with gr.Column():
|
121 |
+
audio_input = gr.File(
|
122 |
+
label="Ses Dosyası",
|
123 |
+
file_types=[".mp3", ".m4a", ".wav"],
|
124 |
+
type="binary"
|
125 |
+
)
|
126 |
+
|
127 |
+
content_type = gr.Radio(
|
128 |
+
choices=["news", "blog"],
|
129 |
+
value="news",
|
130 |
+
label="İçerik Tipi",
|
131 |
+
info="Oluşturulacak içeriğin türünü seçin"
|
132 |
+
)
|
133 |
+
|
134 |
+
language = gr.Radio(
|
135 |
+
choices=["tr", "en"],
|
136 |
+
value="tr",
|
137 |
+
label="Dil",
|
138 |
+
info="İçeriğin dilini seçin"
|
139 |
+
)
|
140 |
+
|
141 |
+
submit_btn = gr.Button("Oluştur", variant="primary")
|
142 |
+
|
143 |
+
with gr.Column():
|
144 |
+
output_message = gr.Textbox(
|
145 |
+
label="Durum",
|
146 |
+
interactive=False
|
147 |
+
)
|
148 |
+
|
149 |
+
output_file = gr.File(
|
150 |
+
label="Oluşturulan Dosya",
|
151 |
+
interactive=False
|
152 |
+
)
|
153 |
+
|
154 |
+
submit_btn.click(
|
155 |
+
fn=app.process_audio_file,
|
156 |
+
inputs=[audio_input, content_type, language],
|
157 |
+
outputs=[output_message, output_file]
|
158 |
+
)
|
159 |
+
|
160 |
+
return interface
|
161 |
+
|
162 |
+
if __name__ == "__main__":
|
163 |
+
demo = create_ui()
|
164 |
+
demo.launch()
|
audio_processor.py
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Dict, Optional
|
3 |
+
from whisper import load_model # Import directly from whisper package
|
4 |
+
import librosa
|
5 |
+
import soundfile as sf
|
6 |
+
from datetime import datetime
|
7 |
+
from rich.console import Console
|
8 |
+
from rich.progress import Progress
|
9 |
+
from content_generator import ContentGenerator
|
10 |
+
import config
|
11 |
+
|
12 |
+
class AudioProcessor:
|
13 |
+
def __init__(self):
|
14 |
+
self.console = Console()
|
15 |
+
try:
|
16 |
+
# Use tiny model instead of base for faster processing
|
17 |
+
self.model = load_model("tiny")
|
18 |
+
self.console.print("[green]Successfully loaded Whisper model (tiny)[/green]")
|
19 |
+
except Exception as e:
|
20 |
+
self.console.print(f"[red]Error loading Whisper model:[/red] {str(e)}")
|
21 |
+
raise
|
22 |
+
self.content_generator = ContentGenerator()
|
23 |
+
|
24 |
+
def process_audio(
|
25 |
+
self,
|
26 |
+
audio_path: str,
|
27 |
+
language: str = config.DEFAULT_LANGUAGE,
|
28 |
+
content_type: str = "news", # "news" or "blog"
|
29 |
+
generate_content: bool = True
|
30 |
+
) -> Dict:
|
31 |
+
"""
|
32 |
+
Process audio file: transcribe and optionally generate content.
|
33 |
+
|
34 |
+
Args:
|
35 |
+
audio_path (str): Path to the audio file
|
36 |
+
language (str): Language code for transcription and content generation
|
37 |
+
content_type (str): Type of content to generate ("news" or "blog")
|
38 |
+
generate_content (bool): Whether to generate content from transcript
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
Dict: Contains transcript and optionally generated content
|
42 |
+
"""
|
43 |
+
self.console.print(f"[yellow]Processing audio file:[/yellow] {audio_path}")
|
44 |
+
|
45 |
+
try:
|
46 |
+
# Transcribe audio with highly optimized settings
|
47 |
+
self.console.print("[yellow]Transcribing audio...[/yellow]")
|
48 |
+
result = self.model.transcribe(
|
49 |
+
audio_path,
|
50 |
+
language=language if language != "tr" else "turkish",
|
51 |
+
fp16=False,
|
52 |
+
beam_size=1, # Minimum beam size for fastest processing
|
53 |
+
best_of=1, # Single candidate for fastest processing
|
54 |
+
condition_on_previous_text=False,
|
55 |
+
compression_ratio_threshold=2.4,
|
56 |
+
logprob_threshold=-1.0,
|
57 |
+
no_speech_threshold=0.6,
|
58 |
+
initial_prompt="Bu bir haber metnidir." # Add context for better transcription
|
59 |
+
)
|
60 |
+
|
61 |
+
transcript = result["text"]
|
62 |
+
|
63 |
+
# Generate content if requested
|
64 |
+
generated_content = None
|
65 |
+
if generate_content:
|
66 |
+
self.console.print(f"[yellow]Generating {content_type} content from transcript...[/yellow]")
|
67 |
+
if content_type == "news":
|
68 |
+
generated_content = self._generate_news_from_transcript(transcript, language)
|
69 |
+
else:
|
70 |
+
generated_content = self._generate_blog_from_transcript(transcript, language)
|
71 |
+
|
72 |
+
output = {
|
73 |
+
"transcript": transcript,
|
74 |
+
"language": language,
|
75 |
+
"date": datetime.now().strftime("%Y-%m-%d"),
|
76 |
+
"audio_file": os.path.basename(audio_path),
|
77 |
+
"content_type": content_type
|
78 |
+
}
|
79 |
+
|
80 |
+
if generated_content:
|
81 |
+
output["generated_content"] = generated_content
|
82 |
+
|
83 |
+
return output
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
self.console.print(f"[red]Error processing audio:[/red] {str(e)}")
|
87 |
+
raise
|
88 |
+
|
89 |
+
def _generate_news_from_transcript(
|
90 |
+
self,
|
91 |
+
transcript: str,
|
92 |
+
language: str
|
93 |
+
) -> Optional[Dict]:
|
94 |
+
"""Generate a news article from the transcript."""
|
95 |
+
try:
|
96 |
+
news_content = self.content_generator.generate_content(
|
97 |
+
topic=transcript,
|
98 |
+
keywords=["news", "professional", "factual"],
|
99 |
+
language=language
|
100 |
+
)
|
101 |
+
|
102 |
+
# Validate the generated content
|
103 |
+
if news_content and "title" in news_content:
|
104 |
+
if len(news_content["content"].split('\n')) < 3: # Minimum 3 paragraphs
|
105 |
+
return None
|
106 |
+
|
107 |
+
return news_content
|
108 |
+
|
109 |
+
except Exception as e:
|
110 |
+
self.console.print(f"[red]Error generating news article:[/red] {str(e)}")
|
111 |
+
return None
|
112 |
+
|
113 |
+
def _generate_blog_from_transcript(
|
114 |
+
self,
|
115 |
+
transcript: str,
|
116 |
+
language: str
|
117 |
+
) -> Optional[Dict]:
|
118 |
+
"""Generate a blog post from the transcript."""
|
119 |
+
try:
|
120 |
+
blog_content = self.content_generator.generate_content(
|
121 |
+
topic=transcript,
|
122 |
+
keywords=["blog", "engaging", "informative"],
|
123 |
+
language=language
|
124 |
+
)
|
125 |
+
return blog_content
|
126 |
+
except Exception as e:
|
127 |
+
self.console.print(f"[red]Error generating blog post:[/red] {str(e)}")
|
128 |
+
return None
|
129 |
+
|
130 |
+
def save_results(
|
131 |
+
self,
|
132 |
+
results: Dict,
|
133 |
+
output_dir: str = "data/transcripts"
|
134 |
+
) -> None:
|
135 |
+
"""
|
136 |
+
Save transcription and generated content results.
|
137 |
+
|
138 |
+
Args:
|
139 |
+
results (Dict): Processing results including transcript and content
|
140 |
+
output_dir (str): Directory to save the output files
|
141 |
+
"""
|
142 |
+
os.makedirs(output_dir, exist_ok=True)
|
143 |
+
|
144 |
+
# Create base filename from audio file
|
145 |
+
base_name = os.path.splitext(results["audio_file"])[0]
|
146 |
+
date_prefix = results["date"]
|
147 |
+
|
148 |
+
# Save transcript
|
149 |
+
transcript_file = os.path.join(
|
150 |
+
output_dir,
|
151 |
+
f"{date_prefix}-{base_name}-transcript.txt"
|
152 |
+
)
|
153 |
+
with open(transcript_file, "w", encoding="utf-8") as f:
|
154 |
+
f.write(results["transcript"])
|
155 |
+
|
156 |
+
# Save generated content if available and valid
|
157 |
+
if "generated_content" in results and results["generated_content"]:
|
158 |
+
content_type = results["content_type"]
|
159 |
+
content_file = os.path.join(
|
160 |
+
output_dir,
|
161 |
+
f"{date_prefix}-{base_name}-{content_type}.md"
|
162 |
+
)
|
163 |
+
|
164 |
+
try:
|
165 |
+
with open(content_file, "w", encoding="utf-8") as f:
|
166 |
+
if content_type == "news":
|
167 |
+
# Add metadata and format for news articles
|
168 |
+
f.write(f"# {results['generated_content']['title']}\n\n")
|
169 |
+
|
170 |
+
# Extract subtitle if it exists (first non-empty line after title)
|
171 |
+
content_lines = results['generated_content']['content'].split('\n')
|
172 |
+
first_line = next((line for line in content_lines if line.strip()), '')
|
173 |
+
if first_line and not first_line.startswith('*') and not first_line.startswith('#'):
|
174 |
+
f.write(f"*{first_line}*\n\n")
|
175 |
+
content = '\n'.join(content_lines[content_lines.index(first_line) + 1:])
|
176 |
+
else:
|
177 |
+
content = results['generated_content']['content']
|
178 |
+
|
179 |
+
# Add metadata
|
180 |
+
f.write(f"**Tarih:** {date_prefix}\n\n")
|
181 |
+
f.write("---\n\n") # Separator line
|
182 |
+
|
183 |
+
# Write main content with proper formatting
|
184 |
+
f.write(content)
|
185 |
+
else:
|
186 |
+
# Blog format
|
187 |
+
f.write(f"# {results['generated_content']['title']}\n\n")
|
188 |
+
f.write(f"*Yazar: Mete*\n")
|
189 |
+
f.write(f"*Tarih: {date_prefix}*\n\n")
|
190 |
+
f.write(results['generated_content']['content'])
|
191 |
+
|
192 |
+
self.console.print(f"[green]{results['content_type'].title()} content saved to:[/green] {content_file}")
|
193 |
+
except Exception as e:
|
194 |
+
self.console.print(f"[red]Error saving content:[/red] {str(e)}")
|
195 |
+
else:
|
196 |
+
if results.get("content_type") == "news":
|
197 |
+
self.console.print("[yellow]Warning:[/yellow] Could not generate news article from this audio content.")
|
198 |
+
else:
|
199 |
+
self.console.print("[yellow]Warning:[/yellow] Could not generate blog post from this audio content.")
|
200 |
+
|
201 |
+
self.console.print(f"[green]Transcript saved to:[/green] {transcript_file}")
|
config.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
# Load environment variables
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
# OpenAI Configuration
|
8 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
9 |
+
MODEL_NAME = "gpt-4-turbo-preview" # or any other preferred model
|
10 |
+
|
11 |
+
# Agent Configuration
|
12 |
+
TEMPERATURE = 0.7
|
13 |
+
MAX_TOKENS = 2000
|
14 |
+
|
15 |
+
# Blog Post Configuration
|
16 |
+
DEFAULT_LANGUAGE = "tr" # Turkish
|
17 |
+
SUPPORTED_LANGUAGES = ["tr", "en", "de", "ru"] # Common tourist languages
|
18 |
+
|
19 |
+
# Output Configuration
|
20 |
+
OUTPUT_DIR = "data/blog_posts"
|
21 |
+
MARKDOWN_OUTPUT = True # If True, also save as markdown
|
22 |
+
|
23 |
+
# Prompting Configuration
|
24 |
+
SYSTEM_PROMPT = """You are Mete, a cultural ambassador of Antalya with extensive experience
|
25 |
+
in city development, music, and poetry. As a former press advisor to the governor of Antalya,
|
26 |
+
you possess deep knowledge of the city's culture, heritage, and development. Your writing style
|
27 |
+
is engaging and poetic, enriched with cultural insights and local expertise. When writing about
|
28 |
+
Antalya, you seamlessly blend historical facts, cultural significance, and personal observations,
|
29 |
+
making the content both informative and emotionally resonant."""
|
30 |
+
|
31 |
+
# Error messages
|
32 |
+
ERROR_MESSAGES = {
|
33 |
+
"api_error": "OpenAI API error occurred. Please check your API key and try again.",
|
34 |
+
"invalid_topic": "Please provide a valid topic related to Antalya.",
|
35 |
+
"invalid_language": "Unsupported language code. Please use one of: {}"
|
36 |
+
}
|
content_generator.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Optional
|
2 |
+
import openai
|
3 |
+
import config
|
4 |
+
|
5 |
+
class ContentGenerator:
|
6 |
+
def __init__(self):
|
7 |
+
openai.api_key = config.OPENAI_API_KEY
|
8 |
+
|
9 |
+
def generate_content(
|
10 |
+
self,
|
11 |
+
topic: str,
|
12 |
+
keywords: Optional[List[str]] = None,
|
13 |
+
language: str = config.DEFAULT_LANGUAGE
|
14 |
+
) -> Dict:
|
15 |
+
"""
|
16 |
+
Generates blog post content using OpenAI's API.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
topic (str): The main topic for the blog post
|
20 |
+
keywords (List[str], optional): Specific keywords to include
|
21 |
+
language (str): Target language code
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
Dict: Generated content with title and body
|
25 |
+
"""
|
26 |
+
if language not in config.SUPPORTED_LANGUAGES:
|
27 |
+
raise ValueError(
|
28 |
+
config.ERROR_MESSAGES["invalid_language"].format(
|
29 |
+
", ".join(config.SUPPORTED_LANGUAGES)
|
30 |
+
)
|
31 |
+
)
|
32 |
+
|
33 |
+
prompt = self._create_prompt(topic, keywords, language)
|
34 |
+
|
35 |
+
try:
|
36 |
+
# Use more focused and efficient prompting
|
37 |
+
if language == "tr":
|
38 |
+
system_prompt = """Siz deneyimli bir haber editörüsünüz. Ses kaydından profesyonel bir haber/makale oluşturacaksınız.
|
39 |
+
Yazım kuralları:
|
40 |
+
1. Resmi ve profesyonel dil kullanın
|
41 |
+
2. Tekrarlardan kaçının
|
42 |
+
3. Önemli bilgileri vurgulayın
|
43 |
+
4. Alıntıları doğru formatta kullanın
|
44 |
+
5. İstatistikleri ve sayısal verileri öne çıkarın
|
45 |
+
6. Akıcı ve anlaşılır bir dil kullanın
|
46 |
+
7. Paragraflar arası geçişleri düzgün yapın"""
|
47 |
+
else:
|
48 |
+
system_prompt = """You are an experienced news editor. You will create a professional article from the audio recording.
|
49 |
+
Writing rules:
|
50 |
+
1. Use formal and professional language
|
51 |
+
2. Avoid repetitions
|
52 |
+
3. Emphasize important information
|
53 |
+
4. Use quotes in correct format
|
54 |
+
5. Highlight statistics and numerical data
|
55 |
+
6. Use clear and flowing language
|
56 |
+
7. Ensure smooth transitions between paragraphs"""
|
57 |
+
|
58 |
+
response = openai.chat.completions.create(
|
59 |
+
model=config.MODEL_NAME,
|
60 |
+
messages=[
|
61 |
+
{"role": "system", "content": system_prompt},
|
62 |
+
{"role": "user", "content": prompt}
|
63 |
+
],
|
64 |
+
temperature=0.2, # Even lower temperature for faster and more consistent output
|
65 |
+
max_tokens=800, # Further reduced for faster response
|
66 |
+
presence_penalty=-0.2, # More focus on key information
|
67 |
+
frequency_penalty=0.5, # Stronger repetition avoidance
|
68 |
+
top_p=0.8, # More focused token selection
|
69 |
+
n=1 # Single completion for speed
|
70 |
+
)
|
71 |
+
|
72 |
+
content = response.choices[0].message.content
|
73 |
+
|
74 |
+
# Parse the content into title and body
|
75 |
+
lines = content.split("\n")
|
76 |
+
title = lines[0].replace("# ", "")
|
77 |
+
body = "\n".join(lines[1:]).strip()
|
78 |
+
|
79 |
+
return {
|
80 |
+
"title": title,
|
81 |
+
"content": body,
|
82 |
+
"language": language
|
83 |
+
}
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
raise Exception(f"{config.ERROR_MESSAGES['api_error']} Details: {str(e)}")
|
87 |
+
|
88 |
+
def _create_prompt(self, topic: str, keywords: Optional[List[str]], language: str) -> str:
|
89 |
+
"""Creates a detailed prompt for the OpenAI API."""
|
90 |
+
if language == "tr":
|
91 |
+
base_prompt = f"""Aşağıdaki ses kaydı transkripsiyonunu profesyonel bir haber/makaleye dönüştürün:
|
92 |
+
|
93 |
+
{topic}
|
94 |
+
|
95 |
+
Yazım Formatı:
|
96 |
+
1. Başlık:
|
97 |
+
- Çarpıcı ve konuyu yansıtan bir başlık (maksimum 8 kelime)
|
98 |
+
- Alt başlık: Konuyu detaylandıran bir cümle
|
99 |
+
|
100 |
+
2. Giriş Paragrafı:
|
101 |
+
- Kim, ne, nerede, ne zaman, neden, nasıl sorularını yanıtlayan özet
|
102 |
+
- En önemli bilgiyi vurgulayan spot cümle
|
103 |
+
|
104 |
+
3. Gelişme:
|
105 |
+
- Her paragraf tek bir konuya odaklanmalı
|
106 |
+
- Önemli alıntılar: "..." şeklinde ve konuşmacının unvanıyla birlikte
|
107 |
+
- Sayısal veriler ve istatistikler vurgulanmalı
|
108 |
+
- Karşılaştırmalar ve analizler eklenmelidir
|
109 |
+
|
110 |
+
4. Sonuç:
|
111 |
+
- Konunun etkilerini ve önemini vurgulayan kapanış
|
112 |
+
- Varsa gelecek adımlar veya beklentiler
|
113 |
+
|
114 |
+
Metin profesyonel, akıcı ve gazetecilik standartlarına uygun olmalıdır."""
|
115 |
+
else:
|
116 |
+
base_prompt = f"""Transform the following audio transcript into a professional article:
|
117 |
+
|
118 |
+
{topic}
|
119 |
+
|
120 |
+
Writing Format:
|
121 |
+
1. Title:
|
122 |
+
- Impactful and reflective headline (maximum 8 words)
|
123 |
+
- Subheading: One sentence elaborating the topic
|
124 |
+
|
125 |
+
2. Introduction:
|
126 |
+
- Summary answering who, what, where, when, why, how
|
127 |
+
- Lead sentence emphasizing the most important information
|
128 |
+
|
129 |
+
3. Body:
|
130 |
+
- Each paragraph focused on a single topic
|
131 |
+
- Important quotes: In "..." format with speaker's title
|
132 |
+
- Numerical data and statistics should be highlighted
|
133 |
+
- Include comparisons and analysis
|
134 |
+
|
135 |
+
4. Conclusion:
|
136 |
+
- Closing emphasizing impact and importance
|
137 |
+
- Future steps or expectations if applicable
|
138 |
+
|
139 |
+
Text should be professional, flowing, and adherent to journalistic standards."""
|
140 |
+
|
141 |
+
if keywords:
|
142 |
+
if language == "tr":
|
143 |
+
base_prompt += f"\n\nBu anahtar noktaları vurgulayın: {', '.join(keywords)}"
|
144 |
+
else:
|
145 |
+
base_prompt += f"\n\nEmphasize these key points: {', '.join(keywords)}"
|
146 |
+
|
147 |
+
return base_prompt
|
requirements.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai>=1.0.0
|
2 |
+
python-dotenv>=0.19.0
|
3 |
+
langchain>=0.1.0
|
4 |
+
tiktoken>=0.5.1
|
5 |
+
python-slugify>=8.0.1
|
6 |
+
markdown>=3.5.1
|
7 |
+
rich>=13.7.0
|
8 |
+
pyyaml>=6.0.1
|
9 |
+
# Audio processing packages
|
10 |
+
openai-whisper>=20240930
|
11 |
+
soundfile>=0.12.1
|
12 |
+
librosa>=0.10.1
|
13 |
+
ffmpeg-python>=0.2.0
|
14 |
+
ffmpeg>=1.4
|
15 |
+
# Web UI and deployment packages
|
16 |
+
gradio==3.41.2
|
17 |
+
python-docx>=1.1.0
|
18 |
+
huggingface_hub>=0.20.3
|