zongxiao commited on
Commit
5307e6b
·
1 Parent(s): a2d9db4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -1,44 +1,35 @@
1
  import torch
 
2
  from transformers import pipeline
 
 
3
 
4
  device="cpu"
 
5
  pipe = pipeline(
6
  "automatic-speech-recognition", model="openai/whisper-large-v2", device=device
7
  )
8
-
9
- def translate(audio):
10
- outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe","language":"chinese"})
11
- return outputs["text"]
12
-
13
- from transformers import BarkModel
14
- from transformers import AutoProcessor
15
- model = BarkModel.from_pretrained("suno/bark-small")
16
  processor = AutoProcessor.from_pretrained("suno/bark")
17
-
18
  model = model.to(device)
19
  synthesised_rate = model.generation_config.sample_rate
20
 
21
-
 
 
22
  def synthesise(text_prompt,voice_preset="v2/zh_speaker_1"):
23
  inputs = processor(text_prompt, voice_preset=voice_preset)
24
  speech_output = model.generate(**inputs.to(device),pad_token_id=10000)
25
- #print(speech_output[0].cpu().numpy())
26
  return speech_output
27
-
28
-
29
  def speech_to_speech_translation(audio):
30
  translated_text = translate(audio)
31
  synthesised_speech = synthesise(translated_text)
32
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
33
  return 16000, synthesised_speech
34
-
35
- import numpy as np
36
  def speech_to_speech_translation(audio,voice_preset="v2/zh_speaker_1"):
37
  translated_text = translate(audio)
38
- #print(translated_text)
39
  synthesised_speech = synthesise(translated_text,voice_preset)
40
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
41
- #print(synthesised_speech)
42
  return synthesised_rate , synthesised_speech
43
  def speech_to_speech_translation_fix(audio,voice_preset="v2/zh_speaker_1"):
44
  synthesised_rate,synthesised_speech = speech_to_speech_translation(audio,voice_preset)
 
1
  import torch
2
+ import numpy as np
3
  from transformers import pipeline
4
+ from transformers import BarkModel
5
+ from transformers import AutoProcessor
6
 
7
  device="cpu"
8
+
9
  pipe = pipeline(
10
  "automatic-speech-recognition", model="openai/whisper-large-v2", device=device
11
  )
 
 
 
 
 
 
 
 
12
  processor = AutoProcessor.from_pretrained("suno/bark")
13
+ model = BarkModel.from_pretrained("suno/bark-small")
14
  model = model.to(device)
15
  synthesised_rate = model.generation_config.sample_rate
16
 
17
+ def translate(audio):
18
+ outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe","language":"chinese"})
19
+ return outputs["text"]
20
  def synthesise(text_prompt,voice_preset="v2/zh_speaker_1"):
21
  inputs = processor(text_prompt, voice_preset=voice_preset)
22
  speech_output = model.generate(**inputs.to(device),pad_token_id=10000)
 
23
  return speech_output
 
 
24
  def speech_to_speech_translation(audio):
25
  translated_text = translate(audio)
26
  synthesised_speech = synthesise(translated_text)
27
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
28
  return 16000, synthesised_speech
 
 
29
  def speech_to_speech_translation(audio,voice_preset="v2/zh_speaker_1"):
30
  translated_text = translate(audio)
 
31
  synthesised_speech = synthesise(translated_text,voice_preset)
32
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
 
33
  return synthesised_rate , synthesised_speech
34
  def speech_to_speech_translation_fix(audio,voice_preset="v2/zh_speaker_1"):
35
  synthesised_rate,synthesised_speech = speech_to_speech_translation(audio,voice_preset)