pragnakalp commited on
Commit
5175f9d
·
1 Parent(s): 82bce9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -10
app.py CHANGED
@@ -42,16 +42,37 @@ def calculate(image_in, audio_in):
42
 
43
  def one_shot(image,input_text,gender):
44
  if gender == 'Female' or gender == 'female':
45
- tts = gTTS(input_text)
46
- with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
47
- tts.write_to_fp(f)
48
- f.seek(0)
49
- sound = AudioSegment.from_file(f.name, format="mp3")
50
- sound.export("/content/audio.wav", format="wav")
51
- audio_in="/content/audio.wav"
52
- video=calculate(image_in,audio_in)
53
- return video
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  def generate_ocr(method,image,gender):
 
42
 
43
  def one_shot(image,input_text,gender):
44
  if gender == 'Female' or gender == 'female':
45
+ tts = gTTS(input_text)
46
+ with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
47
+ tts.write_to_fp(f)
48
+ f.seek(0)
49
+ sound = AudioSegment.from_file(f.name, format="mp3")
50
+ sound.export("/content/audio.wav", format="wav")
51
+ return calculate(image,"/content/audio.wav")
52
+ elif gender == 'Male' or gender == 'male':
53
+ print(gender)
54
+ models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
55
+ "Voicemod/fastspeech2-en-male1",
56
+ arg_overrides={"vocoder": "hifigan", "fp16": False}
57
+ )
58
+
59
+ model = models[0].cuda()
60
+ TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
61
+ generator = task.build_generator([model], cfg)
62
+ # next(model.parameters()).device
63
+
64
+ sample = TTSHubInterface.get_model_input(task, input_text)
65
+ sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].cuda()
66
+ sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].cuda()
67
+ sample["speaker"] = sample["speaker"].cuda()
68
+
69
+ wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
70
+ # soundfile.write("/content/audio_before.wav", wav, rate)
71
+ soundfile.write("/content/audio_before.wav", wav.cpu().clone().numpy(), rate)
72
+ cmd='ffmpeg -i /content/audio_before.wav -filter:a "atempo=0.7" -vn /content/audio.wav'
73
+ os.system(cmd)
74
+ calculate(image,'audio.wav')
75
+
76
 
77
 
78
  def generate_ocr(method,image,gender):