seawolf2357 commited on
Commit
f53f3e8
·
verified ·
1 Parent(s): 9a1f06d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -13,7 +13,9 @@ from youtube_transcript_api.formatters import TextFormatter
13
  from dotenv import load_dotenv
14
  from pytube import YouTube
15
  import whisper
16
-
 
 
17
  # 환경 변수 로드
18
  load_dotenv()
19
 
@@ -163,6 +165,8 @@ async def get_best_available_transcript(video_id, max_retries=5, delay=10):
163
 
164
  return None, None
165
 
 
 
166
  async def generate_whisper_transcript(video_id):
167
  try:
168
  # YouTube 비디오 다운로드
@@ -170,17 +174,29 @@ async def generate_whisper_transcript(video_id):
170
  audio_stream = yt.streams.filter(only_audio=True).first()
171
  audio_file = audio_stream.download(output_path='temp', filename=f'{video_id}.mp3')
172
 
173
- # Whisper 모델을 사용하여 음성을 텍스트로 변환
174
- with open(audio_file, "rb") as f:
175
- response = whisper_client.audio_transcription(audio=f, model="openai/whisper-large-v3")
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  # 임시 파일 삭제
178
  os.remove(audio_file)
179
 
180
- return response['text']
181
  except Exception as e:
182
  logging.error(f'Whisper 자막 생성 실패: {e}')
183
  return None
 
184
 
185
  async def get_video_comments(video_id):
186
  comments = []
 
13
  from dotenv import load_dotenv
14
  from pytube import YouTube
15
  import whisper
16
+ import torch
17
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
18
+ import librosa
19
  # 환경 변수 로드
20
  load_dotenv()
21
 
 
165
 
166
  return None, None
167
 
168
+
169
+
170
  async def generate_whisper_transcript(video_id):
171
  try:
172
  # YouTube 비디오 다운로드
 
174
  audio_stream = yt.streams.filter(only_audio=True).first()
175
  audio_file = audio_stream.download(output_path='temp', filename=f'{video_id}.mp3')
176
 
177
+ # 오디오 파일 로드
178
+ audio, sr = librosa.load(audio_file, sr=16000)
179
+
180
+ # Whisper 모델 및 프로세서 로드
181
+ device = "cuda" if torch.cuda.is_available() else "cpu"
182
+ processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
183
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3").to(device)
184
+
185
+ # 오디오 처리
186
+ input_features = processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(device)
187
+
188
+ # 생성
189
+ predicted_ids = model.generate(input_features)
190
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
191
 
192
  # 임시 파일 삭제
193
  os.remove(audio_file)
194
 
195
+ return transcription[0]
196
  except Exception as e:
197
  logging.error(f'Whisper 자막 생성 실패: {e}')
198
  return None
199
+
200
 
201
  async def get_video_comments(video_id):
202
  comments = []