pragnakalp commited on
Commit
bac325d
·
1 Parent(s): f93771f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -2
app.py CHANGED
@@ -23,10 +23,47 @@ import ffmpeg
23
 
24
  block = gr.Blocks()
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  def generate_ocr(method,image,gender):
29
- print("efef")
30
  return "Hello"
31
 
32
  def run():
@@ -44,7 +81,7 @@ def run():
44
  with gr.Row().style(equal_height=True):
45
  btn = gr.Button("Generate")
46
 
47
- btn.click(generate_ocr, inputs=[image_in, input_text,gender], outputs=[video_out])
48
  # block.queue()
49
  block.launch(server_name="0.0.0.0", server_port=7860)
50
 
 
23
 
24
  block = gr.Blocks()
25
 
26
+ def one_shot(image,input_text,gender):
27
+ return input_text,gender
28
+ exit()
29
+
30
+ if gender == 'Female' or gender == 'female':
31
+ tts = gTTS(input_text)
32
+ with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
33
+ tts.write_to_fp(f)
34
+ f.seek(0)
35
+ sound = AudioSegment.from_file(f.name, format="mp3")
36
+ sound.export("/content/audio.wav", format="wav")
37
+ one_shot_talking(image,'audio.wav')
38
+
39
+ elif gender == 'Male' or gender == 'male':
40
+ print(gender)
41
+ models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
42
+ "Voicemod/fastspeech2-en-male1",
43
+ arg_overrides={"vocoder": "hifigan", "fp16": False}
44
+ )
45
+
46
+ model = models[0].cuda()
47
+ TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
48
+ generator = task.build_generator([model], cfg)
49
+ # next(model.parameters()).device
50
+
51
+ sample = TTSHubInterface.get_model_input(task, input_text)
52
+ sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].cuda()
53
+ sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].cuda()
54
+ sample["speaker"] = sample["speaker"].cuda()
55
+
56
+ wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
57
+ # soundfile.write("/content/audio_before.wav", wav, rate)
58
+ soundfile.write("/content/audio_before.wav", wav.cpu().clone().numpy(), rate)
59
+ cmd='ffmpeg -i /content/audio_before.wav -filter:a "atempo=0.7" -vn /content/audio.wav'
60
+ os.system(cmd)
61
+ one_shot_talking(image,'audio.wav')
62
+
63
+
64
 
65
 
66
  def generate_ocr(method,image,gender):
 
67
  return "Hello"
68
 
69
  def run():
 
81
  with gr.Row().style(equal_height=True):
82
  btn = gr.Button("Generate")
83
 
84
+ btn.click(one_shot, inputs=[image_in, input_text,gender], outputs=[video_out])
85
  # block.queue()
86
  block.launch(server_name="0.0.0.0", server_port=7860)
87