camparchimedes commited on
Commit
006db93
·
verified ·
1 Parent(s): 2177eb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -40,7 +40,7 @@ def transcribe_audio(audio_file):
40
  output = pipe(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 8, "task": "transcribe", "language": "no"})
41
  return output["text"]
42
 
43
- # Gradio interface
44
  iface = gr.Interface(
45
  fn=transcribe_audio,
46
  inputs=gr.Audio(type="filepath"),
@@ -55,7 +55,7 @@ iface = gr.Interface(
55
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
56
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
57
 
58
- # Move summarization model to device
59
  summarization_model.to(device)
60
 
61
  def convert_to_wav(audio_file):
@@ -69,16 +69,15 @@ def convert_to_wav(audio_file):
69
 
70
 
71
 
72
- # Configure tokenizer to have distinct pad_token_id and eos_token_id
73
- if processor.tokenizer.pad_token_id is None:
74
- # Setting pad_token_id explicitly to ensure distinction from eos_token_id
75
- processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id + 1
76
 
77
- # Sanity check to confirm distinct pad and eos tokens
78
- assert processor.tokenizer.pad_token_id != processor.tokenizer.eos_token_id, \
79
- "pad_token_id and eos_token_id must be distinct!"
80
 
81
- # Proceed with the transcription function
82
  def transcribe_audio(audio_file, batch_size=4):
83
  start_time = time.time()
84
  if audio_file.endswith(".m4a"):
@@ -104,8 +103,9 @@ def transcribe_audio(audio_file, batch_size=4):
104
  task="transcribe",
105
  attention_mask=attention_mask,
106
  language="no",
107
- pad_token_id=processor.tokenizer.pad_token_id,
108
- eos_token_id=processor.tokenizer.eos_token_id
 
109
  )
110
 
111
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
@@ -144,7 +144,7 @@ def summarize_text(text):
144
  # HTML syntax for imagery
145
  image_html = """
146
  <div style="text-align: center;">
147
- <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="85%" height="auto">
148
  </div>
149
  """
150
 
 
40
  output = pipe(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 8, "task": "transcribe", "language": "no"})
41
  return output["text"]
42
 
43
+ # Gradio UI
44
  iface = gr.Interface(
45
  fn=transcribe_audio,
46
  inputs=gr.Audio(type="filepath"),
 
55
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
56
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
57
 
58
+ # t5-base to device
59
  summarization_model.to(device)
60
 
61
  def convert_to_wav(audio_file):
 
69
 
70
 
71
 
72
+ # Configure_is__not good enough
73
+ #if processor.tokenizer.pad_token_id is None:
74
+ #processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id + 1
 
75
 
76
+ # Sanity check
77
+ #assert processor.tokenizer.pad_token_id != processor.tokenizer.eos_token_id, \
78
+ #"pad_token_id and eos_token_id must be distinct..and they is not"
79
 
80
+ # transcription
81
  def transcribe_audio(audio_file, batch_size=4):
82
  start_time = time.time()
83
  if audio_file.endswith(".m4a"):
 
103
  task="transcribe",
104
  attention_mask=attention_mask,
105
  language="no",
106
+ **encoded_input, pad_token_id=tokenizer.eos_token_id,
107
+ # pad_token_id=processor.tokenizer.pad_token_id,
108
+ # eos_token_id=processor.tokenizer.eos_token_id
109
  )
110
 
111
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
 
144
  # HTML syntax for imagery
145
  image_html = """
146
  <div style="text-align: center;">
147
+ <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture2.png" alt="Banner" width="85%" height="auto">
148
  </div>
149
  """
150