yunuseduran commited on
Commit
a9209e8
·
verified ·
1 Parent(s): f4578e4

Upload 6 files

Browse files
Files changed (6) hide show
  1. __init__.py +6 -0
  2. app.py +164 -0
  3. audio_processor.py +201 -0
  4. config.py +36 -0
  5. content_generator.py +147 -0
  6. requirements.txt +18 -0
__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """
2
+ Antalya Blog Post Generator package.
3
+ A specialized AI agent for generating culturally rich content about Antalya.
4
+ """
5
+
6
+ __version__ = "0.1.0"
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from datetime import datetime
4
+ from docx import Document
5
+ from docx.shared import Pt, RGBColor
6
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
7
+ from audio_processor import AudioProcessor
8
+ import config
9
+
10
+ class NewsApp:
11
+ def __init__(self):
12
+ self.processor = AudioProcessor()
13
+
14
+ def process_audio_file(self, audio_file, content_type="news", language="tr"):
15
+ """Process audio file and generate content"""
16
+ try:
17
+ if audio_file is None:
18
+ return "Lütfen bir ses dosyası yükleyin.", None
19
+
20
+ # Print debug information
21
+ print(f"Received audio file: {audio_file}")
22
+
23
+ # Create temporary file to save the uploaded content
24
+ temp_dir = "temp_audio"
25
+ os.makedirs(temp_dir, exist_ok=True)
26
+
27
+ # Generate a unique filename
28
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
29
+ temp_audio_path = os.path.join(temp_dir, f"temp_audio_{timestamp}.m4a")
30
+
31
+ # Save the uploaded file
32
+ with open(temp_audio_path, "wb") as f:
33
+ f.write(audio_file)
34
+
35
+ print(f"Saved temporary file to: {temp_audio_path}")
36
+
37
+ # Process audio and generate content
38
+ results = self.processor.process_audio(
39
+ audio_path=temp_audio_path,
40
+ language=language,
41
+ content_type=content_type,
42
+ generate_content=True
43
+ )
44
+
45
+ if not results.get("generated_content"):
46
+ return "İçerik oluşturulamadı. Lütfen ses kaydını kontrol edin.", None
47
+
48
+ # Create Word document
49
+ doc = Document()
50
+
51
+ # Add title
52
+ title = doc.add_heading(results["generated_content"]["title"], 0)
53
+ title.alignment = WD_ALIGN_PARAGRAPH.CENTER
54
+
55
+ # Add date
56
+ date_paragraph = doc.add_paragraph()
57
+ date_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
58
+ date_run = date_paragraph.add_run(f"Tarih: {results['date']}")
59
+ date_run.font.size = Pt(10)
60
+ date_run.font.color.rgb = RGBColor(128, 128, 128)
61
+
62
+ # Add separator
63
+ doc.add_paragraph("").add_run("_" * 50)
64
+
65
+ # Add content
66
+ content_lines = results["generated_content"]["content"].split('\n')
67
+ current_paragraph = None
68
+
69
+ for line in content_lines:
70
+ if line.strip():
71
+ if line.startswith('#'): # Handle headers
72
+ level = line.count('#')
73
+ text = line.strip('#').strip()
74
+ doc.add_heading(text, level)
75
+ else:
76
+ if current_paragraph is None or line.startswith('*'):
77
+ current_paragraph = doc.add_paragraph()
78
+ current_paragraph.add_run(line)
79
+ else:
80
+ current_paragraph = None
81
+
82
+ # Save document
83
+ output_dir = "data/output"
84
+ os.makedirs(output_dir, exist_ok=True)
85
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
86
+ doc_path = os.path.join(output_dir, f"haber_{timestamp}.docx")
87
+ doc.save(doc_path)
88
+
89
+ # Return success message and document path
90
+ return f"İçerik başarıyla oluşturuldu!", doc_path
91
+
92
+ except Exception as e:
93
+ return f"Hata oluştu: {str(e)}", None
94
+
95
+ def create_ui():
96
+ """Create Gradio interface"""
97
+ app = NewsApp()
98
+
99
+ with gr.Blocks(title="Ses Dosyasından Haber Oluşturma", theme=gr.themes.Soft()) as interface:
100
+ gr.Markdown("""
101
+ # 🎙️ Ses Dosyasından Haber/Blog Oluşturma
102
+
103
+ Ses kaydınızı yükleyin, yapay zeka destekli sistemimiz sizin için profesyonel bir haber metni veya blog yazısı oluştursun.
104
+
105
+ ### Nasıl Kullanılır:
106
+ 1. Ses dosyanızı yükleyin (.mp3, .m4a, .wav formatları desteklenir)
107
+ 2. İçerik tipini seçin (Haber/Blog)
108
+ 3. Dili seçin
109
+ 4. "Oluştur" butonuna tıklayın
110
+ 5. Oluşturulan Word belgesini indirin
111
+
112
+ ### Önemli Notlar:
113
+ - Desteklenen ses formatları: MP3, M4A, WAV
114
+ - Maksimum dosya boyutu: 25MB
115
+ - İşlem süresi dosya boyutuna göre değişebilir
116
+ - Türkçe ve İngilizce dilleri desteklenmektedir
117
+ """)
118
+
119
+ with gr.Row():
120
+ with gr.Column():
121
+ audio_input = gr.File(
122
+ label="Ses Dosyası",
123
+ file_types=[".mp3", ".m4a", ".wav"],
124
+ type="binary"
125
+ )
126
+
127
+ content_type = gr.Radio(
128
+ choices=["news", "blog"],
129
+ value="news",
130
+ label="İçerik Tipi",
131
+ info="Oluşturulacak içeriğin türünü seçin"
132
+ )
133
+
134
+ language = gr.Radio(
135
+ choices=["tr", "en"],
136
+ value="tr",
137
+ label="Dil",
138
+ info="İçeriğin dilini seçin"
139
+ )
140
+
141
+ submit_btn = gr.Button("Oluştur", variant="primary")
142
+
143
+ with gr.Column():
144
+ output_message = gr.Textbox(
145
+ label="Durum",
146
+ interactive=False
147
+ )
148
+
149
+ output_file = gr.File(
150
+ label="Oluşturulan Dosya",
151
+ interactive=False
152
+ )
153
+
154
+ submit_btn.click(
155
+ fn=app.process_audio_file,
156
+ inputs=[audio_input, content_type, language],
157
+ outputs=[output_message, output_file]
158
+ )
159
+
160
+ return interface
161
+
162
+ if __name__ == "__main__":
163
+ demo = create_ui()
164
+ demo.launch()
audio_processor.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict, Optional
3
+ from whisper import load_model # Import directly from whisper package
4
+ import librosa
5
+ import soundfile as sf
6
+ from datetime import datetime
7
+ from rich.console import Console
8
+ from rich.progress import Progress
9
+ from content_generator import ContentGenerator
10
+ import config
11
+
12
+ class AudioProcessor:
13
+ def __init__(self):
14
+ self.console = Console()
15
+ try:
16
+ # Use tiny model instead of base for faster processing
17
+ self.model = load_model("tiny")
18
+ self.console.print("[green]Successfully loaded Whisper model (tiny)[/green]")
19
+ except Exception as e:
20
+ self.console.print(f"[red]Error loading Whisper model:[/red] {str(e)}")
21
+ raise
22
+ self.content_generator = ContentGenerator()
23
+
24
+ def process_audio(
25
+ self,
26
+ audio_path: str,
27
+ language: str = config.DEFAULT_LANGUAGE,
28
+ content_type: str = "news", # "news" or "blog"
29
+ generate_content: bool = True
30
+ ) -> Dict:
31
+ """
32
+ Process audio file: transcribe and optionally generate content.
33
+
34
+ Args:
35
+ audio_path (str): Path to the audio file
36
+ language (str): Language code for transcription and content generation
37
+ content_type (str): Type of content to generate ("news" or "blog")
38
+ generate_content (bool): Whether to generate content from transcript
39
+
40
+ Returns:
41
+ Dict: Contains transcript and optionally generated content
42
+ """
43
+ self.console.print(f"[yellow]Processing audio file:[/yellow] {audio_path}")
44
+
45
+ try:
46
+ # Transcribe audio with highly optimized settings
47
+ self.console.print("[yellow]Transcribing audio...[/yellow]")
48
+ result = self.model.transcribe(
49
+ audio_path,
50
+ language=language if language != "tr" else "turkish",
51
+ fp16=False,
52
+ beam_size=1, # Minimum beam size for fastest processing
53
+ best_of=1, # Single candidate for fastest processing
54
+ condition_on_previous_text=False,
55
+ compression_ratio_threshold=2.4,
56
+ logprob_threshold=-1.0,
57
+ no_speech_threshold=0.6,
58
+ initial_prompt="Bu bir haber metnidir." # Add context for better transcription
59
+ )
60
+
61
+ transcript = result["text"]
62
+
63
+ # Generate content if requested
64
+ generated_content = None
65
+ if generate_content:
66
+ self.console.print(f"[yellow]Generating {content_type} content from transcript...[/yellow]")
67
+ if content_type == "news":
68
+ generated_content = self._generate_news_from_transcript(transcript, language)
69
+ else:
70
+ generated_content = self._generate_blog_from_transcript(transcript, language)
71
+
72
+ output = {
73
+ "transcript": transcript,
74
+ "language": language,
75
+ "date": datetime.now().strftime("%Y-%m-%d"),
76
+ "audio_file": os.path.basename(audio_path),
77
+ "content_type": content_type
78
+ }
79
+
80
+ if generated_content:
81
+ output["generated_content"] = generated_content
82
+
83
+ return output
84
+
85
+ except Exception as e:
86
+ self.console.print(f"[red]Error processing audio:[/red] {str(e)}")
87
+ raise
88
+
89
+ def _generate_news_from_transcript(
90
+ self,
91
+ transcript: str,
92
+ language: str
93
+ ) -> Optional[Dict]:
94
+ """Generate a news article from the transcript."""
95
+ try:
96
+ news_content = self.content_generator.generate_content(
97
+ topic=transcript,
98
+ keywords=["news", "professional", "factual"],
99
+ language=language
100
+ )
101
+
102
+ # Validate the generated content
103
+ if news_content and "title" in news_content:
104
+ if len(news_content["content"].split('\n')) < 3: # Minimum 3 paragraphs
105
+ return None
106
+
107
+ return news_content
108
+
109
+ except Exception as e:
110
+ self.console.print(f"[red]Error generating news article:[/red] {str(e)}")
111
+ return None
112
+
113
+ def _generate_blog_from_transcript(
114
+ self,
115
+ transcript: str,
116
+ language: str
117
+ ) -> Optional[Dict]:
118
+ """Generate a blog post from the transcript."""
119
+ try:
120
+ blog_content = self.content_generator.generate_content(
121
+ topic=transcript,
122
+ keywords=["blog", "engaging", "informative"],
123
+ language=language
124
+ )
125
+ return blog_content
126
+ except Exception as e:
127
+ self.console.print(f"[red]Error generating blog post:[/red] {str(e)}")
128
+ return None
129
+
130
+ def save_results(
131
+ self,
132
+ results: Dict,
133
+ output_dir: str = "data/transcripts"
134
+ ) -> None:
135
+ """
136
+ Save transcription and generated content results.
137
+
138
+ Args:
139
+ results (Dict): Processing results including transcript and content
140
+ output_dir (str): Directory to save the output files
141
+ """
142
+ os.makedirs(output_dir, exist_ok=True)
143
+
144
+ # Create base filename from audio file
145
+ base_name = os.path.splitext(results["audio_file"])[0]
146
+ date_prefix = results["date"]
147
+
148
+ # Save transcript
149
+ transcript_file = os.path.join(
150
+ output_dir,
151
+ f"{date_prefix}-{base_name}-transcript.txt"
152
+ )
153
+ with open(transcript_file, "w", encoding="utf-8") as f:
154
+ f.write(results["transcript"])
155
+
156
+ # Save generated content if available and valid
157
+ if "generated_content" in results and results["generated_content"]:
158
+ content_type = results["content_type"]
159
+ content_file = os.path.join(
160
+ output_dir,
161
+ f"{date_prefix}-{base_name}-{content_type}.md"
162
+ )
163
+
164
+ try:
165
+ with open(content_file, "w", encoding="utf-8") as f:
166
+ if content_type == "news":
167
+ # Add metadata and format for news articles
168
+ f.write(f"# {results['generated_content']['title']}\n\n")
169
+
170
+ # Extract subtitle if it exists (first non-empty line after title)
171
+ content_lines = results['generated_content']['content'].split('\n')
172
+ first_line = next((line for line in content_lines if line.strip()), '')
173
+ if first_line and not first_line.startswith('*') and not first_line.startswith('#'):
174
+ f.write(f"*{first_line}*\n\n")
175
+ content = '\n'.join(content_lines[content_lines.index(first_line) + 1:])
176
+ else:
177
+ content = results['generated_content']['content']
178
+
179
+ # Add metadata
180
+ f.write(f"**Tarih:** {date_prefix}\n\n")
181
+ f.write("---\n\n") # Separator line
182
+
183
+ # Write main content with proper formatting
184
+ f.write(content)
185
+ else:
186
+ # Blog format
187
+ f.write(f"# {results['generated_content']['title']}\n\n")
188
+ f.write(f"*Yazar: Mete*\n")
189
+ f.write(f"*Tarih: {date_prefix}*\n\n")
190
+ f.write(results['generated_content']['content'])
191
+
192
+ self.console.print(f"[green]{results['content_type'].title()} content saved to:[/green] {content_file}")
193
+ except Exception as e:
194
+ self.console.print(f"[red]Error saving content:[/red] {str(e)}")
195
+ else:
196
+ if results.get("content_type") == "news":
197
+ self.console.print("[yellow]Warning:[/yellow] Could not generate news article from this audio content.")
198
+ else:
199
+ self.console.print("[yellow]Warning:[/yellow] Could not generate blog post from this audio content.")
200
+
201
+ self.console.print(f"[green]Transcript saved to:[/green] {transcript_file}")
config.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables
5
+ load_dotenv()
6
+
7
+ # OpenAI Configuration
8
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
9
+ MODEL_NAME = "gpt-4-turbo-preview" # or any other preferred model
10
+
11
+ # Agent Configuration
12
+ TEMPERATURE = 0.7
13
+ MAX_TOKENS = 2000
14
+
15
+ # Blog Post Configuration
16
+ DEFAULT_LANGUAGE = "tr" # Turkish
17
+ SUPPORTED_LANGUAGES = ["tr", "en", "de", "ru"] # Common tourist languages
18
+
19
+ # Output Configuration
20
+ OUTPUT_DIR = "data/blog_posts"
21
+ MARKDOWN_OUTPUT = True # If True, also save as markdown
22
+
23
+ # Prompting Configuration
24
+ SYSTEM_PROMPT = """You are Mete, a cultural ambassador of Antalya with extensive experience
25
+ in city development, music, and poetry. As a former press advisor to the governor of Antalya,
26
+ you possess deep knowledge of the city's culture, heritage, and development. Your writing style
27
+ is engaging and poetic, enriched with cultural insights and local expertise. When writing about
28
+ Antalya, you seamlessly blend historical facts, cultural significance, and personal observations,
29
+ making the content both informative and emotionally resonant."""
30
+
31
+ # Error messages
32
+ ERROR_MESSAGES = {
33
+ "api_error": "OpenAI API error occurred. Please check your API key and try again.",
34
+ "invalid_topic": "Please provide a valid topic related to Antalya.",
35
+ "invalid_language": "Unsupported language code. Please use one of: {}"
36
+ }
content_generator.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Optional
2
+ import openai
3
+ import config
4
+
5
+ class ContentGenerator:
6
+ def __init__(self):
7
+ openai.api_key = config.OPENAI_API_KEY
8
+
9
+ def generate_content(
10
+ self,
11
+ topic: str,
12
+ keywords: Optional[List[str]] = None,
13
+ language: str = config.DEFAULT_LANGUAGE
14
+ ) -> Dict:
15
+ """
16
+ Generates blog post content using OpenAI's API.
17
+
18
+ Args:
19
+ topic (str): The main topic for the blog post
20
+ keywords (List[str], optional): Specific keywords to include
21
+ language (str): Target language code
22
+
23
+ Returns:
24
+ Dict: Generated content with title and body
25
+ """
26
+ if language not in config.SUPPORTED_LANGUAGES:
27
+ raise ValueError(
28
+ config.ERROR_MESSAGES["invalid_language"].format(
29
+ ", ".join(config.SUPPORTED_LANGUAGES)
30
+ )
31
+ )
32
+
33
+ prompt = self._create_prompt(topic, keywords, language)
34
+
35
+ try:
36
+ # Use more focused and efficient prompting
37
+ if language == "tr":
38
+ system_prompt = """Siz deneyimli bir haber editörüsünüz. Ses kaydından profesyonel bir haber/makale oluşturacaksınız.
39
+ Yazım kuralları:
40
+ 1. Resmi ve profesyonel dil kullanın
41
+ 2. Tekrarlardan kaçının
42
+ 3. Önemli bilgileri vurgulayın
43
+ 4. Alıntıları doğru formatta kullanın
44
+ 5. İstatistikleri ve sayısal verileri öne çıkarın
45
+ 6. Akıcı ve anlaşılır bir dil kullanın
46
+ 7. Paragraflar arası geçişleri düzgün yapın"""
47
+ else:
48
+ system_prompt = """You are an experienced news editor. You will create a professional article from the audio recording.
49
+ Writing rules:
50
+ 1. Use formal and professional language
51
+ 2. Avoid repetitions
52
+ 3. Emphasize important information
53
+ 4. Use quotes in correct format
54
+ 5. Highlight statistics and numerical data
55
+ 6. Use clear and flowing language
56
+ 7. Ensure smooth transitions between paragraphs"""
57
+
58
+ response = openai.chat.completions.create(
59
+ model=config.MODEL_NAME,
60
+ messages=[
61
+ {"role": "system", "content": system_prompt},
62
+ {"role": "user", "content": prompt}
63
+ ],
64
+ temperature=0.2, # Even lower temperature for faster and more consistent output
65
+ max_tokens=800, # Further reduced for faster response
66
+ presence_penalty=-0.2, # More focus on key information
67
+ frequency_penalty=0.5, # Stronger repetition avoidance
68
+ top_p=0.8, # More focused token selection
69
+ n=1 # Single completion for speed
70
+ )
71
+
72
+ content = response.choices[0].message.content
73
+
74
+ # Parse the content into title and body
75
+ lines = content.split("\n")
76
+ title = lines[0].replace("# ", "")
77
+ body = "\n".join(lines[1:]).strip()
78
+
79
+ return {
80
+ "title": title,
81
+ "content": body,
82
+ "language": language
83
+ }
84
+
85
+ except Exception as e:
86
+ raise Exception(f"{config.ERROR_MESSAGES['api_error']} Details: {str(e)}")
87
+
88
+ def _create_prompt(self, topic: str, keywords: Optional[List[str]], language: str) -> str:
89
+ """Creates a detailed prompt for the OpenAI API."""
90
+ if language == "tr":
91
+ base_prompt = f"""Aşağıdaki ses kaydı transkripsiyonunu profesyonel bir haber/makaleye dönüştürün:
92
+
93
+ {topic}
94
+
95
+ Yazım Formatı:
96
+ 1. Başlık:
97
+ - Çarpıcı ve konuyu yansıtan bir başlık (maksimum 8 kelime)
98
+ - Alt başlık: Konuyu detaylandıran bir cümle
99
+
100
+ 2. Giriş Paragrafı:
101
+ - Kim, ne, nerede, ne zaman, neden, nasıl sorularını yanıtlayan özet
102
+ - En önemli bilgiyi vurgulayan spot cümle
103
+
104
+ 3. Gelişme:
105
+ - Her paragraf tek bir konuya odaklanmalı
106
+ - Önemli alıntılar: "..." şeklinde ve konuşmacının unvanıyla birlikte
107
+ - Sayısal veriler ve istatistikler vurgulanmalı
108
+ - Karşılaştırmalar ve analizler eklenmelidir
109
+
110
+ 4. Sonuç:
111
+ - Konunun etkilerini ve önemini vurgulayan kapanış
112
+ - Varsa gelecek adımlar veya beklentiler
113
+
114
+ Metin profesyonel, akıcı ve gazetecilik standartlarına uygun olmalıdır."""
115
+ else:
116
+ base_prompt = f"""Transform the following audio transcript into a professional article:
117
+
118
+ {topic}
119
+
120
+ Writing Format:
121
+ 1. Title:
122
+ - Impactful and reflective headline (maximum 8 words)
123
+ - Subheading: One sentence elaborating the topic
124
+
125
+ 2. Introduction:
126
+ - Summary answering who, what, where, when, why, how
127
+ - Lead sentence emphasizing the most important information
128
+
129
+ 3. Body:
130
+ - Each paragraph focused on a single topic
131
+ - Important quotes: In "..." format with speaker's title
132
+ - Numerical data and statistics should be highlighted
133
+ - Include comparisons and analysis
134
+
135
+ 4. Conclusion:
136
+ - Closing emphasizing impact and importance
137
+ - Future steps or expectations if applicable
138
+
139
+ Text should be professional, flowing, and adherent to journalistic standards."""
140
+
141
+ if keywords:
142
+ if language == "tr":
143
+ base_prompt += f"\n\nBu anahtar noktaları vurgulayın: {', '.join(keywords)}"
144
+ else:
145
+ base_prompt += f"\n\nEmphasize these key points: {', '.join(keywords)}"
146
+
147
+ return base_prompt
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai>=1.0.0
2
+ python-dotenv>=0.19.0
3
+ langchain>=0.1.0
4
+ tiktoken>=0.5.1
5
+ python-slugify>=8.0.1
6
+ markdown>=3.5.1
7
+ rich>=13.7.0
8
+ pyyaml>=6.0.1
9
+ # Audio processing packages
10
+ openai-whisper>=20240930
11
+ soundfile>=0.12.1
12
+ librosa>=0.10.1
13
+ ffmpeg-python>=0.2.0
14
+ ffmpeg>=1.4
15
+ # Web UI and deployment packages
16
+ gradio==3.41.2
17
+ python-docx>=1.1.0
18
+ huggingface_hub>=0.20.3