mskov commited on
Commit
e560eb6
Β·
1 Parent(s): c03ca5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -16
app.py CHANGED
@@ -22,6 +22,7 @@ dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Aud
22
 
23
  print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
24
 
 
25
  def transcribe(audio):
26
  text = pipe(audio)["text"]
27
  return text
@@ -35,27 +36,73 @@ iface = gr.Interface(
35
 
36
  iface.launch()
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # Evaluate the model
40
- model.eval()
41
- print("model.eval ", model.eval())
42
- with torch.no_grad():
43
- outputs = model(input_ids=input_ids, attention_mask=attention_mask)
44
- print("outputs ", outputs)
45
-
46
- # Convert predicted token IDs back to text
47
- predicted_text = tokenizer.batch_decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
48
 
49
- # Get ground truth labels from the dataset
50
- labels = dataset["audio"] # Replace "labels" with the appropriate key in your dataset
51
- print("labels are ", labels)
52
 
53
- # Compute WER
54
- wer_score = wer(labels, predicted_text)
 
 
 
 
 
 
 
55
 
56
- # Print or return WER score
57
- print(f"Word Error Rate (WER): {wer_score}")
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  '''
60
  print("check check")
61
  print(inputs)
 
22
 
23
  print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
24
 
25
+
26
  def transcribe(audio):
27
  text = pipe(audio)["text"]
28
  return text
 
36
 
37
  iface.launch()
38
 
39
+ def evalWhisper(model, dataset):
40
+ model.eval()
41
+ print("model.eval ", model.eval())
42
+
43
+ # Define a list to store the print statements
44
+ log_texts = []
45
+
46
+ with torch.no_grad():
47
+ outputs = model(**input_data) # Define input_data appropriately
48
+ print("outputs ", outputs)
49
+ log_texts.append(f"outputs: {outputs}")
50
+
51
+ # Convert predicted token IDs back to text
52
+ predicted_text = tokenizer.batch_decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
53
+
54
+ # Get ground truth labels from the dataset
55
+ labels = dataset["audio"] # Replace "labels" with the appropriate key in your dataset
56
+ print("labels are ", labels)
57
+ log_texts.append(f"labels: {labels}")
58
+
59
+ # Compute WER
60
+ wer_score = wer(labels, predicted_text) # Define wer function
61
+
62
+ # Print or return WER score
63
+ wer_message = f"Word Error Rate (WER): {wer_score}"
64
+ print(wer_message)
65
+ log_texts.append(wer_message)
66
 
67
+ print(log_texts)
68
+
69
+ return log_texts
 
 
 
 
 
 
70
 
71
+ # Call evalWhisper and get the log texts
72
+ log_texts = evalWhisper(model, dataset)
 
73
 
74
+ # Display the log texts using gr.Interface
75
+ log_text = "\n".join(log_texts)
76
+ log_interface = gr.Interface(
77
+ fn=lambda: log_text,
78
+ inputs=None,
79
+ outputs="text",
80
+ title="EvalWhisper Log",
81
+ )
82
+ log_interface.launch()
83
 
 
 
84
 
85
+ '''
86
+ # Evaluate the model
87
+ model.eval()
88
+ print("model.eval ", model.eval())
89
+ with torch.no_grad():
90
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
91
+ print("outputs ", outputs)
92
+
93
+ # Convert predicted token IDs back to text
94
+ predicted_text = tokenizer.batch_decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
95
+
96
+ # Get ground truth labels from the dataset
97
+ labels = dataset["audio"] # Replace "labels" with the appropriate key in your dataset
98
+ print("labels are ", labels)
99
+
100
+ # Compute WER
101
+ wer_score = wer(labels, predicted_text)
102
+
103
+ # Print or return WER score
104
+ print(f"Word Error Rate (WER): {wer_score}")
105
+ '''
106
  '''
107
  print("check check")
108
  print(inputs)