seawolf2357 commited on
Commit
848278a
·
verified ·
1 Parent(s): 6c4b884

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -6
app.py CHANGED
@@ -11,6 +11,8 @@ from googleapiclient.discovery import build
11
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
12
  from youtube_transcript_api.formatters import TextFormatter
13
  from dotenv import load_dotenv
 
 
14
 
15
  # 환경 변수 로드
16
  load_dotenv()
@@ -27,6 +29,7 @@ intents.guild_messages = True
27
 
28
  # 추론 API 클라이언트 설정
29
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
 
30
 
31
  # YouTube API 설정
32
  API_KEY = os.getenv("YOUTUBE_API_KEY")
@@ -77,9 +80,15 @@ class MyClient(discord.Client):
77
  replies = await generate_replies(comments, transcript)
78
  await create_thread_and_send_replies(message, video_id, comments, replies, self.session)
79
  else:
80
- await message.channel.send("자막을 가져올 수 없습니다. 댓글만을 기반으로 답변을 생성합니다.")
81
- replies = await generate_replies(comments, "")
82
- await create_thread_and_send_replies(message, video_id, comments, replies, self.session)
 
 
 
 
 
 
83
  else:
84
  await message.channel.send("댓글을 가져올 수 없습니다.")
85
  else:
@@ -87,8 +96,6 @@ class MyClient(discord.Client):
87
  finally:
88
  self.is_processing = False
89
 
90
-
91
-
92
  def is_message_in_specific_channel(self, message):
93
  return message.channel.id == SPECIFIC_CHANNEL_ID or (
94
  isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
@@ -113,7 +120,6 @@ def extract_video_id(url):
113
  logging.debug(f'추출된 비디오 ID: {video_id}')
114
  return video_id
115
 
116
-
117
  async def get_best_available_transcript(video_id, max_retries=5, delay=10):
118
  async def fetch_transcript(language):
119
  try:
@@ -157,7 +163,24 @@ async def get_best_available_transcript(video_id, max_retries=5, delay=10):
157
 
158
  return None, None
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
 
 
 
 
161
 
162
  async def get_video_comments(video_id):
163
  comments = []
 
11
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
12
  from youtube_transcript_api.formatters import TextFormatter
13
  from dotenv import load_dotenv
14
+ from pytube import YouTube
15
+ import whisper
16
 
17
  # 환경 변수 로드
18
  load_dotenv()
 
29
 
30
  # 추론 API 클라이언트 설정
31
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
32
+ whisper_client = InferenceClient("openai/whisper-large-v3", token=os.getenv("HF_TOKEN"))
33
 
34
  # YouTube API 설정
35
  API_KEY = os.getenv("YOUTUBE_API_KEY")
 
80
  replies = await generate_replies(comments, transcript)
81
  await create_thread_and_send_replies(message, video_id, comments, replies, self.session)
82
  else:
83
+ await message.channel.send("자막을 가져올 수 없습니다. Whisper 모델을 사용하여 자막을 생성합니다.")
84
+ transcript = await generate_whisper_transcript(video_id)
85
+ if transcript:
86
+ replies = await generate_replies(comments, transcript)
87
+ await create_thread_and_send_replies(message, video_id, comments, replies, self.session)
88
+ else:
89
+ await message.channel.send("Whisper 모델로도 자막을 생성할 수 없습니다. 댓글만을 기반으로 답변을 생성합니다.")
90
+ replies = await generate_replies(comments, "")
91
+ await create_thread_and_send_replies(message, video_id, comments, replies, self.session)
92
  else:
93
  await message.channel.send("댓글을 가져올 수 없습니다.")
94
  else:
 
96
  finally:
97
  self.is_processing = False
98
 
 
 
99
  def is_message_in_specific_channel(self, message):
100
  return message.channel.id == SPECIFIC_CHANNEL_ID or (
101
  isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
 
120
  logging.debug(f'추출된 비디오 ID: {video_id}')
121
  return video_id
122
 
 
123
  async def get_best_available_transcript(video_id, max_retries=5, delay=10):
124
  async def fetch_transcript(language):
125
  try:
 
163
 
164
  return None, None
165
 
166
+ async def generate_whisper_transcript(video_id):
167
+ try:
168
+ # YouTube 비디오 다운로드
169
+ yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
170
+ audio_stream = yt.streams.filter(only_audio=True).first()
171
+ audio_file = audio_stream.download(output_path='temp', filename=f'{video_id}.mp3')
172
+
173
+ # Whisper 모델을 사용하여 음성을 텍스트로 변환
174
+ with open(audio_file, "rb") as f:
175
+ response = whisper_client.audio_transcription(audio=f, model="openai/whisper-large-v3")
176
+
177
+ # 임시 파일 삭제
178
+ os.remove(audio_file)
179
 
180
+ return response['text']
181
+ except Exception as e:
182
+ logging.error(f'Whisper 자막 생성 실패: {e}')
183
+ return None
184
 
185
  async def get_video_comments(video_id):
186
  comments = []