aka7774 commited on
Commit
f91092b
·
1 Parent(s): 62e6f5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -4,7 +4,13 @@ import whisper
4
 
5
  #model_size = 'aka7774/whisper-large-v3-ct2'
6
  model_size = 'large-v3'
7
- model = whisper.load_model(model_size)
 
 
 
 
 
 
8
 
9
  # model = WhisperModel(model_size, device="cuda", compute_type="float16")
10
  # or run on GPU with INT8
@@ -17,9 +23,16 @@ def speech_to_text(audio_file, _model_size):
17
  if model_size != _model_size:
18
  model_size = _model_size
19
  model = whisper.load_model(model_size)
20
- #model = WhisperModel(model_size, compute_type="float16")
21
 
22
- result = model.transcribe(audio_file)
 
 
 
 
 
 
 
 
23
  #segments, info = model.transcribe(audio_file, beam_size=5)
24
 
25
  return result["text"]
 
4
 
5
  #model_size = 'aka7774/whisper-large-v3-ct2'
6
  model_size = 'large-v3'
7
+ model = whisper.load_model(model_size, device="cpu")
8
+ #_ = model.half()
9
+ #_ = model.cuda()
10
+
11
+ #for m in model.modules():
12
+ # if isinstance(m, whisper.model.LayerNorm):
13
+ # m.float()
14
 
15
  # model = WhisperModel(model_size, device="cuda", compute_type="float16")
16
  # or run on GPU with INT8
 
23
  if model_size != _model_size:
24
  model_size = _model_size
25
  model = whisper.load_model(model_size)
 
26
 
27
+ with torch.no_grad():
28
+ result = model.transcribe(
29
+ audio_file,
30
+ verbose=True,
31
+ language='japanese',
32
+ beam_size=5,
33
+ #fp16=True,
34
+ without_timestamps=False
35
+ )
36
  #segments, info = model.transcribe(audio_file, beam_size=5)
37
 
38
  return result["text"]