Spaces:
Sleeping
Sleeping
Adarsh Shirawalmath
commited on
Commit
·
d584030
1
Parent(s):
f7695fc
Upload
Browse files- app/main.py +22 -44
app/main.py
CHANGED
@@ -3,24 +3,22 @@ from fastapi.responses import StreamingResponse
|
|
3 |
from pydantic import BaseModel, HttpUrl
|
4 |
import os
|
5 |
import logging
|
6 |
-
import pytubefix
|
7 |
-
from openai import OpenAI
|
8 |
-
from deepgram import Deepgram
|
9 |
-
import asyncio
|
10 |
import json
|
11 |
import io
|
12 |
-
import google.generativeai as genai
|
13 |
import time
|
14 |
from collections import deque
|
15 |
import yt_dlp
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
# Set up logging
|
18 |
logging.basicConfig(level=logging.INFO)
|
19 |
logger = logging.getLogger(__name__)
|
20 |
|
21 |
app = FastAPI()
|
22 |
|
23 |
-
# API keys
|
24 |
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
|
25 |
deepgram = Deepgram(DEEPGRAM_API_KEY)
|
26 |
|
@@ -30,12 +28,6 @@ openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
|
30 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
31 |
genai.configure(api_key=GOOGLE_API_KEY)
|
32 |
|
33 |
-
# Rate limit variables
|
34 |
-
RATE_LIMIT = 15
|
35 |
-
RATE_WINDOW = 60
|
36 |
-
request_timestamps = deque()
|
37 |
-
|
38 |
-
# Models for API requests
|
39 |
class VideoURL(BaseModel):
|
40 |
url: HttpUrl
|
41 |
summary_length: str
|
@@ -48,14 +40,16 @@ class ChatMessage(BaseModel):
|
|
48 |
class VideoRequest(BaseModel):
|
49 |
url: str
|
50 |
|
51 |
-
|
|
|
|
|
|
|
52 |
def is_rate_limited():
|
53 |
current_time = time.time()
|
54 |
while request_timestamps and current_time - request_timestamps[0] > RATE_WINDOW:
|
55 |
request_timestamps.popleft()
|
56 |
return len(request_timestamps) >= RATE_LIMIT
|
57 |
|
58 |
-
# Audio transcription function
|
59 |
async def transcribe_audio(audio_file):
|
60 |
try:
|
61 |
with open(audio_file, 'rb') as audio:
|
@@ -67,7 +61,6 @@ async def transcribe_audio(audio_file):
|
|
67 |
logger.error(f"Error transcribing audio: {str(e)}")
|
68 |
return None
|
69 |
|
70 |
-
# Summary generation function
|
71 |
def generate_summary(text, video_description, summary_length):
|
72 |
if summary_length == "100 words - bullet points":
|
73 |
prompt = f"Summarize the following podcast in about 100 words using bullet points. Focus only on the main content and key points discussed in the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"
|
@@ -96,7 +89,6 @@ def generate_summary(text, video_description, summary_length):
|
|
96 |
)
|
97 |
return response.choices[0].message.content.strip()
|
98 |
|
99 |
-
# Quiz generation function
|
100 |
def generate_quiz(text, video_description):
|
101 |
prompt = f"Create a quiz with 10 multiple-choice questions based on the following podcast. Each question should have 4 options (A, B, C, D) with only one correct answer. Focus on the main content and key points discussed in the podcast. Format the output as a JSON array of objects, where each object represents a question with 'question', 'choices', and 'correct_answer' keys. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text[:4000]}"
|
102 |
|
@@ -116,33 +108,23 @@ def generate_quiz(text, video_description):
|
|
116 |
logger.error(f"Raw response: {response.choices[0].message.content}")
|
117 |
return []
|
118 |
|
119 |
-
# Patch to mimic a real browser by modifying headers
|
120 |
-
import pytubefix.request as request_module
|
121 |
-
|
122 |
-
# Backup the original request function
|
123 |
-
_original_execute_request = request_module._execute_request
|
124 |
-
|
125 |
-
def _patched_execute_request(url, method="GET", headers=None, data=None):
|
126 |
-
# Default headers if not provided
|
127 |
-
if headers is None:
|
128 |
-
headers = {}
|
129 |
-
|
130 |
-
# Add a User-Agent header to mimic a real browser
|
131 |
-
headers["User-Agent"] = (
|
132 |
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
133 |
-
"Chrome/91.0.4472.124 Safari/537.36"
|
134 |
-
)
|
135 |
-
# Call the original function with the modified headers
|
136 |
-
return _original_execute_request(url, method=method, headers=headers, data=data)
|
137 |
-
|
138 |
-
# Patch pytube's request function
|
139 |
-
request_module._execute_request = _patched_execute_request
|
140 |
-
|
141 |
-
# Route to handle transcription requests
|
142 |
@app.post("/transcribe")
|
143 |
async def transcribe_video(request: VideoRequest):
|
144 |
url = request.url
|
145 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
# Download the audio from the YouTube video
|
147 |
ydl_opts = {
|
148 |
'format': 'bestaudio/best',
|
@@ -170,7 +152,6 @@ async def transcribe_video(request: VideoRequest):
|
|
170 |
except Exception as e:
|
171 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
172 |
|
173 |
-
# Route to generate audio summary
|
174 |
@app.post("/generate_audio_summary")
|
175 |
async def generate_audio_summary(summary: str):
|
176 |
response = openai_client.audio.speech.create(
|
@@ -183,7 +164,6 @@ async def generate_audio_summary(summary: str):
|
|
183 |
|
184 |
return StreamingResponse(audio_data, media_type="audio/mp3")
|
185 |
|
186 |
-
# Route to handle chat with personality
|
187 |
@app.post("/chat")
|
188 |
async def chat_with_personality(chat_message: ChatMessage):
|
189 |
prompt = f"You are roleplaying as {chat_message.speaker}, a podcast guest. Respond to the user's message in character, based on the content of the podcast. Here's the full transcript for context: {chat_message.transcript[:2000]}"
|
@@ -197,5 +177,3 @@ async def chat_with_personality(chat_message: ChatMessage):
|
|
197 |
)
|
198 |
|
199 |
return {"response": response.choices[0].message.content.strip()}
|
200 |
-
|
201 |
-
#hello
|
|
|
3 |
from pydantic import BaseModel, HttpUrl
|
4 |
import os
|
5 |
import logging
|
|
|
|
|
|
|
|
|
6 |
import json
|
7 |
import io
|
|
|
8 |
import time
|
9 |
from collections import deque
|
10 |
import yt_dlp
|
11 |
+
from openai import OpenAI
|
12 |
+
from deepgram import Deepgram
|
13 |
+
import asyncio
|
14 |
+
import google.generativeai as genai
|
15 |
+
from pytubefix import YouTube
|
16 |
|
|
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
logger = logging.getLogger(__name__)
|
19 |
|
20 |
app = FastAPI()
|
21 |
|
|
|
22 |
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
|
23 |
deepgram = Deepgram(DEEPGRAM_API_KEY)
|
24 |
|
|
|
28 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
29 |
genai.configure(api_key=GOOGLE_API_KEY)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
class VideoURL(BaseModel):
|
32 |
url: HttpUrl
|
33 |
summary_length: str
|
|
|
40 |
class VideoRequest(BaseModel):
|
41 |
url: str
|
42 |
|
43 |
+
RATE_LIMIT = 15
|
44 |
+
RATE_WINDOW = 60
|
45 |
+
request_timestamps = deque()
|
46 |
+
|
47 |
def is_rate_limited():
|
48 |
current_time = time.time()
|
49 |
while request_timestamps and current_time - request_timestamps[0] > RATE_WINDOW:
|
50 |
request_timestamps.popleft()
|
51 |
return len(request_timestamps) >= RATE_LIMIT
|
52 |
|
|
|
53 |
async def transcribe_audio(audio_file):
|
54 |
try:
|
55 |
with open(audio_file, 'rb') as audio:
|
|
|
61 |
logger.error(f"Error transcribing audio: {str(e)}")
|
62 |
return None
|
63 |
|
|
|
64 |
def generate_summary(text, video_description, summary_length):
|
65 |
if summary_length == "100 words - bullet points":
|
66 |
prompt = f"Summarize the following podcast in about 100 words using bullet points. Focus only on the main content and key points discussed in the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"
|
|
|
89 |
)
|
90 |
return response.choices[0].message.content.strip()
|
91 |
|
|
|
92 |
def generate_quiz(text, video_description):
|
93 |
prompt = f"Create a quiz with 10 multiple-choice questions based on the following podcast. Each question should have 4 options (A, B, C, D) with only one correct answer. Focus on the main content and key points discussed in the podcast. Format the output as a JSON array of objects, where each object represents a question with 'question', 'choices', and 'correct_answer' keys. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text[:4000]}"
|
94 |
|
|
|
108 |
logger.error(f"Raw response: {response.choices[0].message.content}")
|
109 |
return []
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
@app.post("/transcribe")
|
112 |
async def transcribe_video(request: VideoRequest):
|
113 |
url = request.url
|
114 |
try:
|
115 |
+
# Prepare the token file required by pytubefix
|
116 |
+
data = {
|
117 |
+
"access_token": os.getenv("ACCESS_TOKEN"),
|
118 |
+
"refresh_token": os.getenv("REFRESH_TOKEN"),
|
119 |
+
"expires": 1823266077 # future timestamp to prevent token refresh issues
|
120 |
+
}
|
121 |
+
token_file_path = "/tmp/token.json"
|
122 |
+
with open(token_file_path, "w") as f:
|
123 |
+
json.dump(data, f)
|
124 |
+
|
125 |
+
# Instantiate YouTube with OAuth credentials
|
126 |
+
yt = YouTube(url, use_oauth=True, allow_oauth_cache=True, token_file=token_file_path)
|
127 |
+
|
128 |
# Download the audio from the YouTube video
|
129 |
ydl_opts = {
|
130 |
'format': 'bestaudio/best',
|
|
|
152 |
except Exception as e:
|
153 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
154 |
|
|
|
155 |
@app.post("/generate_audio_summary")
|
156 |
async def generate_audio_summary(summary: str):
|
157 |
response = openai_client.audio.speech.create(
|
|
|
164 |
|
165 |
return StreamingResponse(audio_data, media_type="audio/mp3")
|
166 |
|
|
|
167 |
@app.post("/chat")
|
168 |
async def chat_with_personality(chat_message: ChatMessage):
|
169 |
prompt = f"You are roleplaying as {chat_message.speaker}, a podcast guest. Respond to the user's message in character, based on the content of the podcast. Here's the full transcript for context: {chat_message.transcript[:2000]}"
|
|
|
177 |
)
|
178 |
|
179 |
return {"response": response.choices[0].message.content.strip()}
|
|
|
|