whisper

Runtime error

App Files Files Community

mskov commited on Dec 14, 2022

Commit

ec1d635

1 Parent(s): 306f4a4

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -32

app.py CHANGED Viewed

@@ -9,40 +9,12 @@ from transformers import AutoModelForCausalLM
 from transformers import AutoTokenizer
 # from next_word_prediction import GPT2
-### code
 gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
-input_ids = tokenizer("Today is a nice day", return_tensors="pt").input_ids
-generated_outputs = gpt2.generate(input_ids, do_sample=True, num_return_sequences=3, output_scores=True)
-# only use id's that were generated
-# gen_sequences has shape [3, 15]
-gen_sequences = generated_outputs.sequences[:, input_ids.shape[-1]:]
-# let's stack the logits generated at each step to a tensor and transform
-# logits to probs
-probs = torch.stack(generated_outputs.scores, dim=1).softmax(-1)  # -> shape [3, 15, vocab_size]
-# now we need to collect the probability of the generated token
-# we need to add a dummy dim in the end to make gather work
-gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
-# now we can do all kinds of things with the probs
-# 1) the probs that exactly those sequences are generated again
-# those are normally going to be very small
-unique_prob_per_sequence = gen_probs.prod(-1)
-# 2) normalize the probs over the three sequences
-normed_gen_probs = gen_probs / gen_probs.sum(0)
-assert normed_gen_probs[:, 0].sum() == 1.0, "probs should be normalized"
-# 3) compare normalized probs to each other like in 1)
-unique_normed_prob_per_sequence = normed_gen_probs.prod(-1)
-### end code
 from share_btn import community_icon_html, loading_icon_html, share_js
 # get gpt2 model
@@ -66,14 +38,45 @@ def inference(audio):
     _, probs = model.detect_language(mel)
     # decode audio data
-    options = whisper.DecodingOptions(fp16 = False)
     # transcribe speech to text
     result = whisper.decode(model, mel, options)
     # print audio data as text
     # print(result.text)
     getText = generator(result.text, max_length=3, num_return_sequences=5)
-    pprint(getText)
     return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)

 from transformers import AutoTokenizer
 # from next_word_prediction import GPT2
+### code snippet
 gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
+### /code snippet
 from share_btn import community_icon_html, loading_icon_html, share_js
 # get gpt2 model
     _, probs = model.detect_language(mel)
     # decode audio data
+    options = whisper.DecodingOptions(fp16 = True)
     # transcribe speech to text
     result = whisper.decode(model, mel, options)
+    ### code
+    input_ids = tokenizer(result, return_tensors="pt").input_ids
+    generated_outputs = gpt2.generate(input_ids, do_sample=True, num_return_sequences=3, output_scores=True)
+    # only use id's that were generated
+    # gen_sequences has shape [3, 15]
+    gen_sequences = generated_outputs.sequences[:, input_ids.shape[-1]:]
+    # let's stack the logits generated at each step to a tensor and transform
+    # logits to probs
+    probs = torch.stack(generated_outputs.scores, dim=1).softmax(-1)  # -> shape [3, 15, vocab_size]
+    # now we need to collect the probability of the generated token
+    # we need to add a dummy dim in the end to make gather work
+    gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
+    # now we can do all kinds of things with the probs
+    # 1) the probs that exactly those sequences are generated again
+    # those are normally going to be very small
+    # unique_prob_per_sequence = gen_probs.prod(-1)
+    # 2) normalize the probs over the three sequences
+    # normed_gen_probs = gen_probs / gen_probs.sum(0)
+    # assert normed_gen_probs[:, 0].sum() == 1.0, "probs should be normalized"
+    # 3) compare normalized probs to each other like in 1)
+    # unique_normed_prob_per_sequence = normed_gen_probs.prod(-1)
+    ### end code
     # print audio data as text
     # print(result.text)
     getText = generator(result.text, max_length=3, num_return_sequences=5)
+        pprint(getText, gen_probs)
     return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)