tzu commited on
Commit
1041028
·
1 Parent(s): 692c174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -4,32 +4,22 @@ import transformers
4
  # predictions = pipeline(image)
5
  # return {p["label"]: p["score"] for p in predictions}
6
 
7
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
8
  from datasets import load_dataset
9
  import torch
10
-
 
11
  def predict(speech):
12
  # load model and tokenizer
13
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
14
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
15
-
16
- #pipeline = pipeline(task="speech-classification", model="facebook/wav2vec2-base-960h")
17
-
18
- # load dummy dataset and read soundfiles
19
- ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
20
-
21
- # tokenize
22
- input_values = processor(ds[0]["audio"]["array"], return_tensors="pt", padding="longest").input_values # Batch size 1
23
-
24
- # retrieve logits
25
- logits = model(input_values).logits
26
-
27
- # take argmax and decode
28
- predicted_ids = torch.argmax(logits, dim=-1)
29
- transcription = processor.batch_decode(predicted_ids)
30
- return transcription
31
 
32
- demo = gr.Interface(fn=predict outputs="label")
33
 
34
  demo.launch()
35
 
 
4
  # predictions = pipeline(image)
5
  # return {p["label"]: p["score"] for p in predictions}
6
 
 
7
  from datasets import load_dataset
8
  import torch
9
+ from transformers import pipeline
10
+
11
  def predict(speech):
12
  # load model and tokenizer
13
+ torch.manual_seed(42)
14
+ ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
15
+ audio_file = ds[0]["audio"]["path"]
16
+ audio_classifier = pipeline(
17
+ task="audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
18
+ preds = audio_classifier(audio_file)
19
+ preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
20
+ return preds
 
 
 
 
 
 
 
 
 
 
21
 
22
+ demo = gr.Interface(fn=predict, inputs='texts' outputs="texts")
23
 
24
  demo.launch()
25