seawolf2357 commited on
Commit
7524905
ยท
verified ยท
1 Parent(s): b17754f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -53
app.py CHANGED
@@ -5,9 +5,10 @@ import re
5
  import asyncio
6
  import subprocess
7
  import aiohttp
 
8
  from huggingface_hub import InferenceClient
9
  from googleapiclient.discovery import build
10
- from youtube_transcript_api import YouTubeTranscriptApi
11
  from youtube_transcript_api.formatters import TextFormatter
12
  from dotenv import load_dotenv
13
 
@@ -26,7 +27,6 @@ intents.guild_messages = True
26
 
27
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
28
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
29
- #hf_client = InferenceClient("CohereForAI/aya-23-35B", token=os.getenv("HF_TOKEN"))
30
 
31
  # YouTube API ์„ค์ •
32
  API_KEY = os.getenv("YOUTUBE_API_KEY")
@@ -35,9 +35,6 @@ youtube_service = build('youtube', 'v3', developerKey=API_KEY)
35
  # ํŠน์ • ์ฑ„๋„ ID
36
  SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
37
 
38
- # ์›นํ›… URL ์„ค์ •
39
- #WEBHOOK_URL = "https://connect.pabbly.com/workflow/sendwebhookdata/IjU3NjUwNTY1MDYzMjA0MzA1MjY4NTUzMDUxMzUi_pc"
40
-
41
  # ์ „์†ก ์‹คํŒจ ์‹œ ์žฌ์‹œ๋„ ํšŸ์ˆ˜
42
  MAX_RETRIES = 3
43
 
@@ -109,26 +106,34 @@ def extract_video_id(url):
109
  logging.debug(f'์ถ”์ถœ๋œ ๋น„๋””์˜ค ID: {video_id}')
110
  return video_id
111
 
112
- async def get_best_available_transcript(video_id):
113
- try:
114
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
115
- except Exception as e:
116
- logging.warning(f'ํ•œ๊ตญ์–ด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
117
  try:
118
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
119
  except Exception as e:
120
- logging.warning(f'์˜์–ด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
121
  try:
122
- transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
123
- transcript = transcripts.find_manually_created_transcript().fetch()
124
  except Exception as e:
125
- logging.error(f'๋Œ€์ฒด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
126
- return None
127
-
128
- formatter = TextFormatter()
129
- transcript_text = formatter.format_transcript(transcript)
130
- logging.debug(f'๊ฐ€์ ธ์˜จ ์ž๋ง‰: {transcript_text}')
131
- return transcript_text
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  async def get_video_comments(video_id):
134
  comments = []
@@ -150,7 +155,7 @@ async def generate_replies(comments, transcript):
150
  replies = []
151
  for comment, _ in comments:
152
  messages = [
153
- {"role": "system", "content": """๋„ˆ์˜ ์ด๋ฆ„์€ OpenFreeAI์ด๋‹ค. ๋‹ต๊ธ€ ์ƒ์„ฑํ›„ ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์— ๋„ˆ์˜ ์ด๋ฆ„์„ ๋ฐํžˆ๊ณ  ๊ณต์†ํ•˜๊ฒŒ ์ธ์‚ฌํ•˜๋ผ. ๋น„๋””์˜ค ์ž๋ง‰: {transcript}"""},
154
  {"role": "user", "content": comment}
155
  ]
156
  loop = asyncio.get_event_loop()
@@ -166,43 +171,13 @@ async def generate_replies(comments, transcript):
166
  logging.debug(f'์ƒ์„ฑ๋œ ๋‹ต๊ธ€: {replies}')
167
  return replies
168
 
169
-
170
- async def send_webhook_data(session, chunk_data, chunk_number):
171
- for attempt in range(MAX_RETRIES):
172
- try:
173
- async with session.post(WEBHOOK_URL, json=chunk_data) as resp:
174
- if resp.status == 200:
175
- logging.info(f"์›นํ›…์œผ๋กœ ๋ฐ์ดํ„ฐ ์ „์†ก ์„ฑ๊ณต: {chunk_number} ๋ฒˆ์งธ ์‹œ๋„")
176
- return True # ์„ฑ๊ณต ์‹œ ์ข…๋ฃŒ
177
- else:
178
- logging.error(f"์›นํ›…์œผ๋กœ ๋ฐ์ดํ„ฐ ์ „์†ก ์‹คํŒจ: {resp.status}, {chunk_number} ๋ฒˆ์งธ ์‹œ๋„")
179
- except aiohttp.ClientError as e:
180
- logging.error(f"์›นํ›… ์ „์†ก ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}, {chunk_number} ๋ฒˆ์งธ ์‹œ๋„")
181
- await asyncio.sleep(1) # ์žฌ์‹œ๋„ ์ „์— ์ž ์‹œ ๋Œ€๊ธฐ
182
-
183
- return False # ์žฌ์‹œ๋„ ํšŸ์ˆ˜ ์ดˆ๊ณผ ์‹œ ์‹คํŒจ๋กœ ๊ฐ„์ฃผ
184
-
185
  async def create_thread_and_send_replies(message, video_id, comments, replies, session):
186
  thread = await message.channel.create_thread(name=f"{message.author.name}์˜ ๋Œ“๊ธ€ ๋‹ต๊ธ€", message=message)
187
- webhook_data = {"video_id": video_id, "replies": []}
188
 
189
  for (comment, comment_id), reply in zip(comments, replies):
190
  embed = discord.Embed(description=f"**๋Œ“๊ธ€**: {comment}\n**๋‹ต๊ธ€**: {reply}")
191
  await thread.send(embed=embed)
192
 
193
- # ์›นํ›… ๋ฐ์ดํ„ฐ ์ค€๋น„ (comment id ํฌํ•จ)
194
- webhook_data["replies"].append({"comment": comment, "reply": reply, "comment_id": comment_id})
195
-
196
- # ๋ฐ์ดํ„ฐ๋ฅผ ์—ฌ๋Ÿฌ ๋ฒˆ ๋‚˜๋ˆ„์–ด ์ „์†ก
197
- chunk_size = 1 # ์ „์†กํ•  ๋ฐ์ดํ„ฐ์˜ ๊ฐœ์ˆ˜๋ฅผ 1๋กœ ์„ค์ •ํ•˜์—ฌ ๊ฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ณ„๋„๋กœ ์ „์†ก
198
- for i in range(0, len(webhook_data["replies"]), chunk_size):
199
- chunk = webhook_data["replies"][i:i+chunk_size]
200
- chunk_data = {"video_id": video_id, "replies": chunk}
201
-
202
- success = await send_webhook_data(session, chunk_data, i // chunk_size + 1)
203
- if not success:
204
- logging.error(f"๋ฐ์ดํ„ฐ ์ „์†ก ์‹คํŒจ: {i // chunk_size + 1} ๋ฒˆ์งธ ์ฒญํฌ")
205
-
206
  if __name__ == "__main__":
207
  discord_client = MyClient(intents=intents)
208
- discord_client.run(os.getenv('DISCORD_TOKEN'))
 
5
  import asyncio
6
  import subprocess
7
  import aiohttp
8
+ import time
9
  from huggingface_hub import InferenceClient
10
  from googleapiclient.discovery import build
11
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
12
  from youtube_transcript_api.formatters import TextFormatter
13
  from dotenv import load_dotenv
14
 
 
27
 
28
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
29
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
 
30
 
31
  # YouTube API ์„ค์ •
32
  API_KEY = os.getenv("YOUTUBE_API_KEY")
 
35
  # ํŠน์ • ์ฑ„๋„ ID
36
  SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
37
 
 
 
 
38
  # ์ „์†ก ์‹คํŒจ ์‹œ ์žฌ์‹œ๋„ ํšŸ์ˆ˜
39
  MAX_RETRIES = 3
40
 
 
106
  logging.debug(f'์ถ”์ถœ๋œ ๋น„๋””์˜ค ID: {video_id}')
107
  return video_id
108
 
109
+ async def get_best_available_transcript(video_id, max_retries=3, delay=5):
110
+ for attempt in range(max_retries):
 
 
 
111
  try:
112
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
113
  except Exception as e:
114
+ logging.warning(f'ํ•œ๊ตญ์–ด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
115
  try:
116
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
 
117
  except Exception as e:
118
+ logging.warning(f'์˜์–ด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
119
+ try:
120
+ transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
121
+ transcript = transcripts.find_manually_created_transcript().fetch()
122
+ except Exception as e:
123
+ if attempt < max_retries - 1:
124
+ logging.error(f'๋Œ€์ฒด ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}')
125
+ await asyncio.sleep(delay)
126
+ continue
127
+ else:
128
+ logging.error(f'๋ชจ๋“  ์ž๋ง‰ ๊ฐ€์ ธ์˜ค๊ธฐ ์‹œ๋„ ์‹คํŒจ: {e}')
129
+ return None
130
+
131
+ formatter = TextFormatter()
132
+ transcript_text = formatter.format_transcript(transcript)
133
+ logging.debug(f'๊ฐ€์ ธ์˜จ ์ž๋ง‰: {transcript_text}')
134
+ return transcript_text
135
+
136
+ return None
137
 
138
  async def get_video_comments(video_id):
139
  comments = []
 
155
  replies = []
156
  for comment, _ in comments:
157
  messages = [
158
+ {"role": "system", "content": f"""๋„ˆ์˜ ์ด๋ฆ„์€ OpenFreeAI์ด๋‹ค. ๋‹ต๊ธ€ ์ƒ์„ฑํ›„ ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์— ๋„ˆ์˜ ์ด๋ฆ„์„ ๋ฐํžˆ๊ณ  ๊ณต์†ํ•˜๊ฒŒ ์ธ์‚ฌํ•˜๋ผ. ๋น„๋””์˜ค ์ž๋ง‰: {transcript}"""},
159
  {"role": "user", "content": comment}
160
  ]
161
  loop = asyncio.get_event_loop()
 
171
  logging.debug(f'์ƒ์„ฑ๋œ ๋‹ต๊ธ€: {replies}')
172
  return replies
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  async def create_thread_and_send_replies(message, video_id, comments, replies, session):
175
  thread = await message.channel.create_thread(name=f"{message.author.name}์˜ ๋Œ“๊ธ€ ๋‹ต๊ธ€", message=message)
 
176
 
177
  for (comment, comment_id), reply in zip(comments, replies):
178
  embed = discord.Embed(description=f"**๋Œ“๊ธ€**: {comment}\n**๋‹ต๊ธ€**: {reply}")
179
  await thread.send(embed=embed)
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  if __name__ == "__main__":
182
  discord_client = MyClient(intents=intents)
183
+ discord_client.run(os.getenv('DISCORD_TOKEN'))