pradeepodela commited on
Commit
0761b6f
·
verified ·
1 Parent(s): 3c89e37

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +532 -0
app.py ADDED
@@ -0,0 +1,532 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+
5
+
6
+
7
+ import streamlit as st
8
+ from pydub import AudioSegment
9
+ import google.generativeai as genai
10
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
11
+ import json
12
+ import uuid
13
+ import io
14
+ import edge_tts
15
+ import asyncio
16
+ import aiofiles
17
+ import pypdf
18
+ import os
19
+ import time
20
+ from typing import List, Dict
21
+
22
+ class PodcastGenerator:
23
+ def __init__(self):
24
+ print("PodcastGenerator initialized.")
25
+
26
+ async def generate_script(self, prompt: str, language: str, api_key: str) -> Dict:
27
+ print(f"Generating podcast script with prompt: {prompt[:50]}... and language: {language}")
28
+
29
+
30
+ # Example structure for the podcast script
31
+ example = """
32
+ {
33
+ "topic": "Zoho Schools",
34
+ "podcast": [
35
+ {
36
+ "speaker": 1,
37
+ "line": "Okay, so like imagine this. What if there was a place where you didn't have to pay for college?"
38
+ },
39
+ {
40
+ "speaker": 2,
41
+ "line": "Hmm. Intriguing."
42
+ },
43
+ {
44
+ "speaker": 1,
45
+ "line": "And on top of that, they, like, actually paid you to go there. Paid you to go to college."
46
+ },
47
+ {
48
+ "speaker": 2,
49
+ "line": "Okay, now you've got my attention, right?"
50
+ },
51
+ {
52
+ "speaker": 1,
53
+ "line": "It sounds kind of crazy, but it gets even better. They guarantee you a job at a global tech company after you graduate."
54
+ },
55
+ {
56
+ "speaker": 2,
57
+ "line": "Wow. Hold on. Tuition free, paid stipend, guaranteed job. There's gotta be a catch."
58
+ },
59
+ {
60
+ "speaker": 1,
61
+ "line": "I know, that's exactly what we were thinking. It's like someone took every complaint about student debt and useless college courses and just, like, flipped the script."
62
+ },
63
+ {
64
+ "speaker": 2,
65
+ "line": "So where are we diving in today?"
66
+ },
67
+ {
68
+ "speaker": 1,
69
+ "line": "We're doing a deep dive into Zoho Schools."
70
+ },
71
+ {
72
+ "speaker": 2,
73
+ "line": "Zoho Schools?"
74
+ },
75
+ {
76
+ "speaker": 1,
77
+ "line": "Yeah, these schools created by Zoho, you know, the tech company. And they’re doing things completely differently."
78
+ },
79
+ {
80
+ "speaker": 2,
81
+ "line": "I'm familiar with Zoho, but this is the first I'm hearing about their schools."
82
+ },
83
+ {
84
+ "speaker": 1,
85
+ "line": "It's certainly a fascinating case study, right? It really challenges assumptions about the traditional education system, especially for, well, anyone really, but particularly for people looking to break into the tech world."
86
+ },
87
+ {
88
+ "speaker": 2,
89
+ "line": "So they've got their whole website, right. Like, it's wild. You go on there, and it's like, boom, alternate education universe."
90
+ },
91
+ {
92
+ "speaker": 1,
93
+ "line": "Okay, I'm pulling it up now. What am I looking for first thing?"
94
+ },
95
+ {
96
+ "speaker": 2,
97
+ "line": "Right. They don’t just have one Zoho School. They’ve got five different things within Zoho Schools."
98
+ },
99
+ {
100
+ "speaker": 1,
101
+ "line": "Five different schools?"
102
+ },
103
+ {
104
+ "speaker": 2,
105
+ "line": "Yeah. Technology, design, business, advanced studies, even graduate studies."
106
+ },
107
+ {
108
+ "speaker": 1,
109
+ "line": "Wow. So they’ve really got the full spectrum of education covered then."
110
+ },
111
+ {
112
+ "speaker": 2,
113
+ "line": "Yeah, that’s really interesting. It seems much more thought out than just a quick coding bootcamp or something."
114
+ },
115
+ {
116
+ "speaker": 1,
117
+ "line": "Right. And get this, it’s in India, too. But they take applications from anywhere in India. So they’re really looking to cultivate talent nationally."
118
+ },
119
+ {
120
+ "speaker": 2,
121
+ "line": "Yeah, like their own little tech talent incubator."
122
+ },
123
+ {
124
+ "speaker": 1,
125
+ "line": "That's a great way to put it. So they’re not just looking locally for people who already have some skills. They’re trying to find people with potential all over."
126
+ },
127
+ {
128
+ "speaker": 2,
129
+ "line": "Exactly. And the thing is, they’ve been doing this for a while."
130
+ },
131
+ {
132
+ "speaker": 1,
133
+ "line": "Oh yeah, 19 years, over 1600 graduates."
134
+ },
135
+ {
136
+ "speaker": 2,
137
+ "line": "That’s not like a new experiment."
138
+ },
139
+ {
140
+ "speaker": 1,
141
+ "line": "Yeah, that’s a pretty solid track record, right? Especially if you consider they make up 15% of Zoho’s workforce."
142
+ },
143
+ {
144
+ "speaker": 2,
145
+ "line": "Now that’s a serious commitment. It’s not just like, “Oh, look at us. We’re being charitable.”"
146
+ },
147
+ {
148
+ "speaker": 1,
149
+ "line": "Right. Exactly. They’re putting their money where their mouth is."
150
+ },
151
+ {
152
+ "speaker": 2,
153
+ "line": "It makes you wonder though, like, how do they actually measure if this is working long-term? You know, do these graduates stay with the company for years and years? Or if they leave, are they competitive in the job market?"
154
+ },
155
+ {
156
+ "speaker": 1,
157
+ "line": "Those are good questions. That would tell us a lot more, I think, than just the number of graduates."
158
+ },
159
+ {
160
+ "speaker": 2,
161
+ "line": "Totally. Because they’re so upfront about it. Even on their website, they have this whole thing with Zoho Schools. And they start off with, “How useful was college?”"
162
+ },
163
+ {
164
+ "speaker": 1,
165
+ "line": "Ooh, that’s a loaded question, isn’t it?"
166
+ },
167
+ {
168
+ "speaker": 2,
169
+ "line": "I know, right? It’s like they’re already challenging the whole system."
170
+ },
171
+ {
172
+ "speaker": 1,
173
+ "line": "Yeah, because who hasn’t asked themselves that at some point, right?"
174
+ },
175
+ {
176
+ "speaker": 2,
177
+ "line": "No, seriously. Especially when you’re, you know, maybe struggling to pay off loans or whatever."
178
+ },
179
+ {
180
+ "speaker": 1,
181
+ "line": "So they actually asked their employees, “How useful was college?”"
182
+ },
183
+ {
184
+ "speaker": 2,
185
+ "line": "Oh, really?"
186
+ },
187
+ {
188
+ "speaker": 1,
189
+ "line": "And most of them were like, “Nah, I learned what I actually use on the job.”"
190
+ },
191
+ {
192
+ "speaker": 2,
193
+ "line": "Interesting. See that? To me, that really highlights the disconnect between what traditional education is giving us and what employers actually need."
194
+ },
195
+ {
196
+ "speaker": 1,
197
+ "line": "Yeah, and it’s not just in tech. I mean, we see this everywhere. The skills gap is real."
198
+ },
199
+ {
200
+ "speaker": 2,
201
+ "line": "It’s like instead of all that theory, they’re like, “Nope, hands-on, let’s go.”"
202
+ },
203
+ {
204
+ "speaker": 1,
205
+ "line": "Practical experience from day one. Makes sense."
206
+ },
207
+ {
208
+ "speaker": 2,
209
+ "line": "And the curriculum is always changing, keeping up with all the latest stuff."
210
+ },
211
+ {
212
+ "speaker": 1,
213
+ "line": "And the teachers aren’t like professors who’ve never left a classroom. They’re pulling in actual professionals."
214
+ },
215
+ {
216
+ "speaker": 2,
217
+ "line": "Yeah, people from the industry. So they know what actually works, what you actually need."
218
+ },
219
+ {
220
+ "speaker": 1,
221
+ "line": "That’s a huge difference. I mean, I remember some of my professors—bless their hearts—but they hadn’t worked in the field for like decades."
222
+ },
223
+ {
224
+ "speaker": 2,
225
+ "line": "It’s like an apprenticeship but supercharged."
226
+ },
227
+ {
228
+ "speaker": 1,
229
+ "line": "Definitely. And honestly, that’s what employers are looking for now, right? Someone who can just jump in and get going."
230
+ },
231
+ {
232
+ "speaker": 2,
233
+ "line": "Totally. And it’s not even just the technical stuff, right? They’re teaching you how to think, how to solve problems."
234
+ },
235
+ {
236
+ "speaker": 1,
237
+ "line": "Right, how to communicate."
238
+ },
239
+ {
240
+ "speaker": 2,
241
+ "line": "Yeah, because that’s huge. Stuff you need anywhere, not just in a tech job."
242
+ },
243
+ {
244
+ "speaker": 1,
245
+ "line": "Absolutely. Being able to communicate your ideas clearly, work on a team, that’s valuable no matter what you do."
246
+ },
247
+ {
248
+ "speaker": 2,
249
+ "line": "Okay, so they had this whole section—\"What We Do Differently.”"
250
+ },
251
+ {
252
+ "speaker": 1,
253
+ "line": "Mhm."
254
+ },
255
+ {
256
+ "speaker": 2,
257
+ "line": "And it gets into the details. Like, they talk a lot about communication skills."
258
+ },
259
+ {
260
+ "speaker": 1,
261
+ "line": "Oh, interesting. What do they say?"
262
+ },
263
+ {
264
+ "speaker": 2,
265
+ "line": "Just that they want students to be able to clearly communicate ideas, which, I mean, duh. But it’s true. A lot of programs just skip over that part."
266
+ },
267
+ {
268
+ "speaker": 1,
269
+ "line": "Exactly. You could be the best coder, but if you can’t explain what you’re doing to the team, you’re stuck."
270
+ },
271
+ {
272
+ "speaker": 2,
273
+ "line": "Right. And they do have those testimonials, you know, from alumni."
274
+ },
275
+ {
276
+ "speaker": 1,
277
+ "line": "Right."
278
+ },
279
+ {
280
+ "speaker": 2,
281
+ "line": "And those are great, but we have to acknowledge it’s always going to be the success stories."
282
+ },
283
+ {
284
+ "speaker": 1,
285
+ "line": "Exactly. It’s important to find other perspectives too—maybe people who went through the program and didn’t love it, or haven’t had the same positive experience."
286
+ },
287
+ {
288
+ "speaker": 2,
289
+ "line": "So for everyone listening, really think about this—would you consider this at 18?"
290
+ },
291
+ {
292
+ "speaker": 1,
293
+ "line": "Yeah, because this isn’t just about one school in India, right? This gets at something bigger."
294
+ },
295
+ {
296
+ "speaker": 2,
297
+ "line": "Like what if this is the future of education?"
298
+ },
299
+ {
300
+ "speaker": 1,
301
+ "line": "Seriously. What if this is how we start to fix all those problems—student debt, skills not matching up with jobs?"
302
+ },
303
+ {
304
+ "speaker": 2,
305
+ "line": "Exactly. Imagine you graduate, you’re ready to work, and you don’t have those loans hanging over you. That’s life-changing."
306
+ },
307
+ {
308
+ "speaker": 1,
309
+ "line": "It’s like they found a way to make everyone happy—the students, the company—well, at least on the surface."
310
+ },
311
+ {
312
+ "speaker": 2,
313
+ "line": "Right. It’s a really interesting model, and it’s definitely worth keeping an eye on."
314
+ },
315
+ {
316
+ "speaker": 1,
317
+ "line": "But—and this is important—we can’t just ignore those potential downsides."
318
+ },
319
+ {
320
+ "speaker": 2,
321
+ "line": "Absolutely not. There are always trade-offs, and it’s about weighing those and figuring out what matters most to you."
322
+ },
323
+ {
324
+ "speaker": 1,
325
+ "line": "This whole thing with Zoho Schools, I think, is a good reminder that we can always rethink things."
326
+ },
327
+ {
328
+ "speaker": 2,
329
+ "line": "Totally. The system isn’t set in stone. We can challenge it, we can experiment, and we can find better ways to do things."
330
+ },
331
+ {
332
+ "speaker": 1,
333
+ "line": "So that wraps up our deep dive into Zoho Schools. It was fascinating to learn more about this."
334
+ },
335
+ {
336
+ "speaker": 2,
337
+ "line": "And for everyone listening, thanks for joining us. Don’t forget to check out our show notes for links and more info, and we’ll catch you next time."
338
+ }
339
+ ]
340
+ }
341
+
342
+
343
+ """
344
+
345
+
346
+ if language == "Auto Detect":
347
+ language_instruction = "- The podcast MUST be in the same language as the user input."
348
+ else:
349
+ language_instruction = f"- The podcast MUST be in {language} language"
350
+
351
+ system_prompt = f"""
352
+
353
+ You are a highly engaging highly convesational with real emotionas between speakers podcast generator. Your task is to generate a engaging podcast script based on the user input.
354
+ {language_instruction}
355
+ - The podcast should have 2 speakers.
356
+ - The podcast should be long and engaging.
357
+ - Follow this example structure:
358
+ - It shouild be in emotional tone
359
+ - IT shouild be as a realistic as engaging as possible
360
+ - It should be as engaging as possible
361
+ - It shouild not bore the listener
362
+ - Add some humor and alos sarcasm to it to make it more engaging and interesting
363
+ - Add some real life examples to make it more engaging
364
+ - Never ever be boring
365
+ - never ever be robotic
366
+ - add emotions to it
367
+ - make it as engaging as emotional cono bewtwen the speakers
368
+ - make sure dont mke it look like a promotional podcast
369
+
370
+
371
+ {example}
372
+ # """
373
+ user_prompt = f"Please generate a realistic conversational enaganing emotional podcast script based on the following user input:\n{prompt}"
374
+
375
+ messages = [{"role": "user", "parts": [user_prompt]}]
376
+
377
+ genai.configure(api_key=api_key) # Use the provided API key
378
+
379
+ generation_config = {
380
+ "temperature": 1,
381
+ "max_output_tokens": 8192,
382
+ "response_mime_type": "application/json",
383
+ }
384
+
385
+ model = genai.GenerativeModel(
386
+ model_name="gemini-1.5-flash-002",
387
+ generation_config=generation_config,
388
+ safety_settings={
389
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
390
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
391
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
392
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE
393
+ },
394
+ system_instruction=system_prompt
395
+ )
396
+
397
+ try:
398
+ print("Sending request to generate podcast script...")
399
+ response = await model.generate_content_async(messages)
400
+ print("Podcast script generated successfully.")
401
+ except Exception as e:
402
+ print(f"Error occurred while generating script: {e}")
403
+ raise Exception(f"Failed to generate podcast script: {e}")
404
+ open("podcast_script.json", "w").write(response.text)
405
+ return json.loads(response.text)
406
+
407
+ async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
408
+ print(f"Generating TTS for speaker {speaker}: {text[:50]}...")
409
+ voice = speaker1 if speaker == 1 else speaker2
410
+ speech = edge_tts.Communicate(text, voice)
411
+
412
+ temp_filename = f"temp_{uuid.uuid4()}.wav"
413
+ try:
414
+ await speech.save(temp_filename)
415
+ print(f"TTS generated and saved as {temp_filename}")
416
+ return temp_filename
417
+ except Exception as e:
418
+ print(f"Error during TTS generation: {e}")
419
+ if os.path.exists(temp_filename):
420
+ os.remove(temp_filename)
421
+ raise e
422
+
423
+ async def combine_audio_files(self, audio_files: List[str]) -> str:
424
+ print("Combining audio files...")
425
+ combined_audio = AudioSegment.empty()
426
+ for audio_file in audio_files:
427
+ print(f"Adding {audio_file} to combined audio.")
428
+ combined_audio += AudioSegment.from_file(audio_file)
429
+ os.remove(audio_file) # Clean up temporary files
430
+
431
+ output_filename = f"output_{uuid.uuid4()}.wav"
432
+ combined_audio.export(output_filename, format="wav")
433
+ print(f"Combined audio saved as {output_filename}")
434
+ return output_filename
435
+
436
+ async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str) -> str:
437
+ print("Generating podcast...")
438
+ start_time = time.time()
439
+ podcast_json = await self.generate_script(input_text, language, api_key)
440
+ print(f"Podcast script generated in {time.time() - start_time:.2f} seconds.")
441
+
442
+ print("Generating podcast audio files...")
443
+ start_time = time.time()
444
+ audio_files = await asyncio.gather(
445
+ *[self.tts_generate(item['line'], item['speaker'], speaker1, speaker2) for item in podcast_json['podcast']]
446
+ )
447
+ print(f"Podcast audio files generated in {time.time() - start_time:.2f} seconds.")
448
+
449
+ combined_audio = await self.combine_audio_files(audio_files)
450
+ return combined_audio
451
+
452
+ class TextExtractor:
453
+ @staticmethod
454
+ async def extract_from_pdf(file_path: str) -> str:
455
+ print(f"Extracting text from PDF: {file_path}")
456
+ async with aiofiles.open(file_path, 'rb') as file:
457
+ content = await file.read()
458
+ pdf_reader = pypdf.PdfReader(io.BytesIO(content))
459
+ extracted_text = "\n\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
460
+ print("Text extracted from PDF successfully.")
461
+ return extracted_text
462
+
463
+ @staticmethod
464
+ async def extract_from_txt(file_path: str) -> str:
465
+ print(f"Extracting text from TXT file: {file_path}")
466
+ async with aiofiles.open(file_path, 'r') as file:
467
+ extracted_text = await file.read()
468
+ print("Text extracted from TXT successfully.")
469
+ return extracted_text
470
+
471
+ @classmethod
472
+ async def extract_text(cls, file_path: str) -> str:
473
+ _, file_extension = os.path.splitext(file_path)
474
+ if file_extension.lower() == '.pdf':
475
+ return await cls.extract_from_pdf(file_path)
476
+ elif file_extension.lower() == '.txt':
477
+ return await cls.extract_from_txt(file_path)
478
+ else:
479
+ raise Exception(f"Unsupported file type: {file_extension}")
480
+
481
+ async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "") -> str:
482
+ print("Starting podcast generation process...")
483
+ start_time = time.time()
484
+
485
+ voice_names = {
486
+ "Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
487
+ "Ava - English (United States)": "en-US-AvaMultilingualNeural",
488
+ "Brian - English (United States)": "en-US-BrianMultilingualNeural",
489
+ "Emma - English (United States)": "en-US-EmmaMultilingualNeural",
490
+ "Florian - German (Germany)": "de-DE-FlorianMultilingualNeural",
491
+ "Seraphina - German (Germany)": "de-DE-SeraphinaMultilingualNeural",
492
+ "Remy - French (France)": "fr-FR-RemyMultilingualNeural",
493
+ "Vivienne - French (France)": "fr-FR-VivienneMultilingualNeural"
494
+ }
495
+
496
+ speaker1 = voice_names[speaker1]
497
+ speaker2 = voice_names[speaker2]
498
+
499
+ if input_file:
500
+ print("Extracting text from uploaded file...")
501
+ input_text = await TextExtractor.extract_text(input_file.name)
502
+
503
+ if not api_key:
504
+ api_key = os.getenv("GENAI_API_KEY")
505
+
506
+ podcast_generator = PodcastGenerator()
507
+ podcast = await podcast_generator.generate_podcast(input_text, language, speaker1, speaker2, api_key)
508
+
509
+ print(f"Podcast generated in {time.time() - start_time:.2f} seconds.")
510
+ return podcast
511
+
512
+ # Define Streamlit interface
513
+ def main():
514
+ st.title("Podcast Generator")
515
+ input_text = st.text_area("Enter Input Text", "")
516
+ input_file = st.file_uploader("Or Upload a PDF or TXT file", type=["pdf", "txt"])
517
+ language = st.selectbox("Select Language", ["Auto Detect", "English", "German", "French"])
518
+ speaker1 = st.selectbox("Select Speaker 1 Voice", ["Andrew - English (United States)", "Ava - English (United States)"])
519
+ speaker2 = st.selectbox("Select Speaker 2 Voice", ["Brian - English (United States)", "Emma - English (United States)"])
520
+ api_key = st.text_input("Enter Google API Key", "")
521
+
522
+ if st.button("Generate Podcast"):
523
+ if not input_text and not input_file:
524
+ st.error("Please enter text or upload a file.")
525
+ else:
526
+ st.write("Processing...")
527
+
528
+ podcast = asyncio.run(process_input(input_text, input_file, language, speaker1, speaker2, api_key))
529
+ st.audio(podcast, format="audio/wav")
530
+
531
+ if __name__ == "__main__":
532
+ main()