Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,9 +5,10 @@ import re
|
|
5 |
import asyncio
|
6 |
import subprocess
|
7 |
import aiohttp
|
|
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from googleapiclient.discovery import build
|
10 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
11 |
from youtube_transcript_api.formatters import TextFormatter
|
12 |
from dotenv import load_dotenv
|
13 |
|
@@ -26,7 +27,6 @@ intents.guild_messages = True
|
|
26 |
|
27 |
# ์ถ๋ก API ํด๋ผ์ด์ธํธ ์ค์
|
28 |
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
|
29 |
-
#hf_client = InferenceClient("CohereForAI/aya-23-35B", token=os.getenv("HF_TOKEN"))
|
30 |
|
31 |
# YouTube API ์ค์
|
32 |
API_KEY = os.getenv("YOUTUBE_API_KEY")
|
@@ -35,9 +35,6 @@ youtube_service = build('youtube', 'v3', developerKey=API_KEY)
|
|
35 |
# ํน์ ์ฑ๋ ID
|
36 |
SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
|
37 |
|
38 |
-
# ์นํ
URL ์ค์
|
39 |
-
#WEBHOOK_URL = "https://connect.pabbly.com/workflow/sendwebhookdata/IjU3NjUwNTY1MDYzMjA0MzA1MjY4NTUzMDUxMzUi_pc"
|
40 |
-
|
41 |
# ์ ์ก ์คํจ ์ ์ฌ์๋ ํ์
|
42 |
MAX_RETRIES = 3
|
43 |
|
@@ -109,26 +106,34 @@ def extract_video_id(url):
|
|
109 |
logging.debug(f'์ถ์ถ๋ ๋น๋์ค ID: {video_id}')
|
110 |
return video_id
|
111 |
|
112 |
-
async def get_best_available_transcript(video_id):
|
113 |
-
|
114 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
|
115 |
-
except Exception as e:
|
116 |
-
logging.warning(f'ํ๊ตญ์ด ์๋ง ๊ฐ์ ธ์ค๊ธฐ ์ค๋ฅ: {e}')
|
117 |
try:
|
118 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['
|
119 |
except Exception as e:
|
120 |
-
logging.warning(f'
|
121 |
try:
|
122 |
-
|
123 |
-
transcript = transcripts.find_manually_created_transcript().fetch()
|
124 |
except Exception as e:
|
125 |
-
logging.
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
async def get_video_comments(video_id):
|
134 |
comments = []
|
@@ -150,7 +155,7 @@ async def generate_replies(comments, transcript):
|
|
150 |
replies = []
|
151 |
for comment, _ in comments:
|
152 |
messages = [
|
153 |
-
{"role": "system", "content": """๋์ ์ด๋ฆ์ OpenFreeAI์ด๋ค. ๋ต๊ธ ์์ฑํ ๊ฐ์ฅ ๋ง์ง๋ง์ ๋์ ์ด๋ฆ์ ๋ฐํ๊ณ ๊ณต์ํ๊ฒ ์ธ์ฌํ๋ผ. ๋น๋์ค ์๋ง: {transcript}"""},
|
154 |
{"role": "user", "content": comment}
|
155 |
]
|
156 |
loop = asyncio.get_event_loop()
|
@@ -166,43 +171,13 @@ async def generate_replies(comments, transcript):
|
|
166 |
logging.debug(f'์์ฑ๋ ๋ต๊ธ: {replies}')
|
167 |
return replies
|
168 |
|
169 |
-
|
170 |
-
async def send_webhook_data(session, chunk_data, chunk_number):
|
171 |
-
for attempt in range(MAX_RETRIES):
|
172 |
-
try:
|
173 |
-
async with session.post(WEBHOOK_URL, json=chunk_data) as resp:
|
174 |
-
if resp.status == 200:
|
175 |
-
logging.info(f"์นํ
์ผ๋ก ๋ฐ์ดํฐ ์ ์ก ์ฑ๊ณต: {chunk_number} ๋ฒ์งธ ์๋")
|
176 |
-
return True # ์ฑ๊ณต ์ ์ข
๋ฃ
|
177 |
-
else:
|
178 |
-
logging.error(f"์นํ
์ผ๋ก ๋ฐ์ดํฐ ์ ์ก ์คํจ: {resp.status}, {chunk_number} ๋ฒ์งธ ์๋")
|
179 |
-
except aiohttp.ClientError as e:
|
180 |
-
logging.error(f"์นํ
์ ์ก ์ค ์ค๋ฅ ๋ฐ์: {e}, {chunk_number} ๋ฒ์งธ ์๋")
|
181 |
-
await asyncio.sleep(1) # ์ฌ์๋ ์ ์ ์ ์ ๋๊ธฐ
|
182 |
-
|
183 |
-
return False # ์ฌ์๋ ํ์ ์ด๊ณผ ์ ์คํจ๋ก ๊ฐ์ฃผ
|
184 |
-
|
185 |
async def create_thread_and_send_replies(message, video_id, comments, replies, session):
|
186 |
thread = await message.channel.create_thread(name=f"{message.author.name}์ ๋๊ธ ๋ต๊ธ", message=message)
|
187 |
-
webhook_data = {"video_id": video_id, "replies": []}
|
188 |
|
189 |
for (comment, comment_id), reply in zip(comments, replies):
|
190 |
embed = discord.Embed(description=f"**๋๊ธ**: {comment}\n**๋ต๊ธ**: {reply}")
|
191 |
await thread.send(embed=embed)
|
192 |
|
193 |
-
# ์นํ
๋ฐ์ดํฐ ์ค๋น (comment id ํฌํจ)
|
194 |
-
webhook_data["replies"].append({"comment": comment, "reply": reply, "comment_id": comment_id})
|
195 |
-
|
196 |
-
# ๋ฐ์ดํฐ๋ฅผ ์ฌ๋ฌ ๋ฒ ๋๋์ด ์ ์ก
|
197 |
-
chunk_size = 1 # ์ ์กํ ๋ฐ์ดํฐ์ ๊ฐ์๋ฅผ 1๋ก ์ค์ ํ์ฌ ๊ฐ ๋ฐ์ดํฐ๋ฅผ ๋ณ๋๋ก ์ ์ก
|
198 |
-
for i in range(0, len(webhook_data["replies"]), chunk_size):
|
199 |
-
chunk = webhook_data["replies"][i:i+chunk_size]
|
200 |
-
chunk_data = {"video_id": video_id, "replies": chunk}
|
201 |
-
|
202 |
-
success = await send_webhook_data(session, chunk_data, i // chunk_size + 1)
|
203 |
-
if not success:
|
204 |
-
logging.error(f"๋ฐ์ดํฐ ์ ์ก ์คํจ: {i // chunk_size + 1} ๋ฒ์งธ ์ฒญํฌ")
|
205 |
-
|
206 |
if __name__ == "__main__":
|
207 |
discord_client = MyClient(intents=intents)
|
208 |
-
discord_client.run(os.getenv('DISCORD_TOKEN'))
|
|
|
5 |
import asyncio
|
6 |
import subprocess
|
7 |
import aiohttp
|
8 |
+
import time
|
9 |
from huggingface_hub import InferenceClient
|
10 |
from googleapiclient.discovery import build
|
11 |
+
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
12 |
from youtube_transcript_api.formatters import TextFormatter
|
13 |
from dotenv import load_dotenv
|
14 |
|
|
|
27 |
|
28 |
# ์ถ๋ก API ํด๋ผ์ด์ธํธ ์ค์
|
29 |
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
|
|
|
30 |
|
31 |
# YouTube API ์ค์
|
32 |
API_KEY = os.getenv("YOUTUBE_API_KEY")
|
|
|
35 |
# ํน์ ์ฑ๋ ID
|
36 |
SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
|
37 |
|
|
|
|
|
|
|
38 |
# ์ ์ก ์คํจ ์ ์ฌ์๋ ํ์
|
39 |
MAX_RETRIES = 3
|
40 |
|
|
|
106 |
logging.debug(f'์ถ์ถ๋ ๋น๋์ค ID: {video_id}')
|
107 |
return video_id
|
108 |
|
109 |
+
async def get_best_available_transcript(video_id, max_retries=3, delay=5):
|
110 |
+
for attempt in range(max_retries):
|
|
|
|
|
|
|
111 |
try:
|
112 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
|
113 |
except Exception as e:
|
114 |
+
logging.warning(f'ํ๊ตญ์ด ์๋ง ๊ฐ์ ธ์ค๊ธฐ ์ค๋ฅ: {e}')
|
115 |
try:
|
116 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
|
|
117 |
except Exception as e:
|
118 |
+
logging.warning(f'์์ด ์๋ง ๊ฐ์ ธ์ค๊ธฐ ์ค๋ฅ: {e}')
|
119 |
+
try:
|
120 |
+
transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
|
121 |
+
transcript = transcripts.find_manually_created_transcript().fetch()
|
122 |
+
except Exception as e:
|
123 |
+
if attempt < max_retries - 1:
|
124 |
+
logging.error(f'๋์ฒด ์๋ง ๊ฐ์ ธ์ค๊ธฐ ์ค๋ฅ: {e}')
|
125 |
+
await asyncio.sleep(delay)
|
126 |
+
continue
|
127 |
+
else:
|
128 |
+
logging.error(f'๋ชจ๋ ์๋ง ๊ฐ์ ธ์ค๊ธฐ ์๋ ์คํจ: {e}')
|
129 |
+
return None
|
130 |
+
|
131 |
+
formatter = TextFormatter()
|
132 |
+
transcript_text = formatter.format_transcript(transcript)
|
133 |
+
logging.debug(f'๊ฐ์ ธ์จ ์๋ง: {transcript_text}')
|
134 |
+
return transcript_text
|
135 |
+
|
136 |
+
return None
|
137 |
|
138 |
async def get_video_comments(video_id):
|
139 |
comments = []
|
|
|
155 |
replies = []
|
156 |
for comment, _ in comments:
|
157 |
messages = [
|
158 |
+
{"role": "system", "content": f"""๋์ ์ด๋ฆ์ OpenFreeAI์ด๋ค. ๋ต๊ธ ์์ฑํ ๊ฐ์ฅ ๋ง์ง๋ง์ ๋์ ์ด๋ฆ์ ๋ฐํ๊ณ ๊ณต์ํ๊ฒ ์ธ์ฌํ๋ผ. ๋น๋์ค ์๋ง: {transcript}"""},
|
159 |
{"role": "user", "content": comment}
|
160 |
]
|
161 |
loop = asyncio.get_event_loop()
|
|
|
171 |
logging.debug(f'์์ฑ๋ ๋ต๊ธ: {replies}')
|
172 |
return replies
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
async def create_thread_and_send_replies(message, video_id, comments, replies, session):
|
175 |
thread = await message.channel.create_thread(name=f"{message.author.name}์ ๋๊ธ ๋ต๊ธ", message=message)
|
|
|
176 |
|
177 |
for (comment, comment_id), reply in zip(comments, replies):
|
178 |
embed = discord.Embed(description=f"**๋๊ธ**: {comment}\n**๋ต๊ธ**: {reply}")
|
179 |
await thread.send(embed=embed)
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
if __name__ == "__main__":
|
182 |
discord_client = MyClient(intents=intents)
|
183 |
+
discord_client.run(os.getenv('DISCORD_TOKEN'))
|