camparchimedes commited on
Commit
ca9b01d
·
verified ·
1 Parent(s): c6bec82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -14,28 +14,22 @@ def convert_to_wav(audio_file):
14
  return wav_file
15
 
16
  import torch
17
- from transformers import AutoProcessor, pipeline
18
-
19
-
20
- # Initialize processor and pipeline
21
 
22
  # torch_dtype = torch.float32
 
23
 
24
-
25
- MODEL_NAME = "NbAiLabBeta/nb-whisper-large"
26
-
27
- device = "cuda" if torch.cuda.is_available() else "cpu"
28
- pipe = pipeline(
29
- task="automatic-speech-recognition",
30
- model=MODEL_NAME,
31
- chunk_length_s=30,
32
- device=device,
33
  )
34
 
 
35
 
36
- pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="no", task="transcribe")
37
-
38
 
 
39
 
40
  def transcribe_audio(audio_file):
41
  if audio_file.endswith(".m4a"):
@@ -56,7 +50,6 @@ def transcribe_audio(audio_file):
56
  return text, result
57
 
58
 
59
- # [VERSION 3: full-on w/ 3 styles for summarization]
60
  import nltk
61
  from nltk.tokenize import word_tokenize, sent_tokenize
62
  from nltk.corpus import stopwords
@@ -103,7 +96,8 @@ def preprocess_text(text):
103
 
104
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
105
 
106
- summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", return_dict=True, torch_dtype=torch.float16)
 
107
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
108
  summarization_model.to(device)
109
 
 
14
  return wav_file
15
 
16
  import torch
 
 
 
 
17
 
18
  # torch_dtype = torch.float32
19
+ #MODEL_NAME = "NbAiLabBeta/nb-whisper-large"
20
 
21
+ #pipe = pipeline(
22
+ #task="automatic-speech-recognition",
23
+ #model=MODEL_NAME,
24
+ #chunk_length_s=30,
25
+ #device=device,
 
 
 
 
26
  )
27
 
28
+ #pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="no", task="transcribe")
29
 
30
+ from transformers import pipeline
 
31
 
32
+ pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large")
33
 
34
  def transcribe_audio(audio_file):
35
  if audio_file.endswith(".m4a"):
 
50
  return text, result
51
 
52
 
 
53
  import nltk
54
  from nltk.tokenize import word_tokenize, sent_tokenize
55
  from nltk.corpus import stopwords
 
96
 
97
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
98
 
99
+ device = "cuda" if torch.cuda.is_available() else "cpu"
100
+ summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", torch_dtype=torch.float16)
101
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
102
  summarization_model.to(device)
103