A-yum1 commited on
Commit
0dcfc65
·
2 Parent(s): 08e8b96 042eae0

Merge branch 'masui/transcription'

Browse files
Files changed (1) hide show
  1. transcription.py +26 -26
transcription.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from faster_whisper import WhisperModel
3
 
4
  class TranscriptionMaker():
5
- #書き起こしファイル(ファイル名_transcription.txt)を吐き出すディレクトリを指定
6
  def __init__(self,output_dir=os.path.abspath("/tmp/data/transcriptions")):
7
  self.model = WhisperModel("base", device="cpu")
8
  self.output_dir = output_dir
@@ -13,35 +13,35 @@ class TranscriptionMaker():
13
  print(f"Error creating directory {self.output_dir}: {e}")
14
  raise
15
 
16
- #音声ファイルのパスを受け取り、書き起こしファイルを作成する
17
- def create_transcription(self,audio_path):
18
- try:
19
- if not os.path.isfile(audio_path):
20
- raise FileNotFoundError(f"The specified audio file does not exist: {audio_path}")
21
-
22
- segments, info = self.model.transcribe(audio_path)
23
- results = []
24
-
 
 
 
 
 
 
 
25
  for segment in segments:
26
  results.append({
27
  "start": segment.start,
28
  "end": segment.end,
29
  "text": segment.text
30
  })
31
-
32
- #ファイルの書き込み
33
- output_file=os.path.join(self.output_dir,os.path.basename(audio_path)+"_transcription.txt")
34
- try:
35
- with open(output_file,"w",encoding="utf-8") as f:
36
- for result in results:
37
- f.write(f"[{result['start']:.2f}s - {result['end']:.2f}s] {result['text']}\n")
38
- except OSError as e:
39
- print(f"Error writing transcription file: {e}")
40
- raise
41
- return output_file
42
- except FileNotFoundError as e:
43
- print(f"Error: {e}")
44
  raise
45
- except Exception as e:
46
- print(f"An unexpected error occurred: {e}")
47
- raise
 
2
  from faster_whisper import WhisperModel
3
 
4
  class TranscriptionMaker():
5
+ #書き起こしファイルを吐き出すディレクトリを指定
6
  def __init__(self,output_dir=os.path.abspath("/tmp/data/transcriptions")):
7
  self.model = WhisperModel("base", device="cpu")
8
  self.output_dir = output_dir
 
13
  print(f"Error creating directory {self.output_dir}: {e}")
14
  raise
15
 
16
+ #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
17
+ def create_transcription(self,audio_directory):
18
+ results = []
19
+ #ディレクトリ内のファイルを全て取得
20
+ if not os.path.isdir(audio_directory):
21
+ raise ValueError(f"The specified path is not a valid directory: {audio_directory}")
22
+ audio_files = os.listdir(audio_directory)
23
+ for audio_file in audio_files:
24
+ if os.path.splitext(audio_file)[-1].lower() != '.wav':
25
+ continue
26
+ audio_path = os.path.join(audio_directory, audio_file)
27
+ try:
28
+ segments,info = list(self.model.transcribe(audio_path))
29
+ except Exception as e:
30
+ print(f"Error transcripting file {audio_path}: {e}")
31
+ raise
32
  for segment in segments:
33
  results.append({
34
  "start": segment.start,
35
  "end": segment.end,
36
  "text": segment.text
37
  })
38
+ #ファイルの書き込み。ファイル名は"読み込みディレクトリ名_transcription.txt"
39
+ output_file=os.path.join(self.output_dir,os.path.basename(audio_directory)+"_transcription.txt")
40
+ try:
41
+ with open(output_file,"w",encoding="utf-8") as f:
42
+ for result in results:
43
+ f.write(f"{result['text']}\n")
44
+ except OSError as e:
45
+ print(f"Error writing transcription file: {e}")
 
 
 
 
 
46
  raise
47
+ return output_file