Athspi commited on
Commit
cab5167
·
verified ·
1 Parent(s): 573a1cc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ import gradio as gr
5
+ import librosa
6
+ import soundfile as sf
7
+ from scipy.io import wavfile
8
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
9
+ from scipy import signal
10
+
11
+ # Set device
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ print(f"Using device: {device}")
14
+
15
+ class VoiceConverter:
16
+ def __init__(self):
17
+ # Load wav2vec model for audio feature extraction
18
+ self.processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
19
+ self.model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h").to(device)
20
+
21
+ # Parameters for voice conversion
22
+ self.female_pitch_shift = 2.0 # Shift pitch up for female voice
23
+ self.female_formant_shift = 1.2 # Adjust formants for female voice
24
+
25
+ def extract_features(self, audio, sample_rate):
26
+ """Extract audio features using wav2vec2"""
27
+ # Resample if needed
28
+ if sample_rate != 16000:
29
+ audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)
30
+ sample_rate = 16000
31
+
32
+ # Extract features
33
+ inputs = self.processor(audio, sampling_rate=sample_rate, return_tensors="pt").to(device)
34
+ with torch.no_grad():
35
+ outputs = self.model(**inputs)
36
+
37
+ # Get hidden states (features)
38
+ hidden_states = outputs.hidden_states[-1].squeeze().cpu().numpy()
39
+ return hidden_states
40
+
41
+ def convert_to_female(self, audio_path, output_path):
42
+ """Convert voice from male to female"""
43
+ try:
44
+ # Load audio file
45
+ audio, sample_rate = librosa.load(audio_path, sr=None)
46
+
47
+ # Convert to mono if stereo
48
+ if len(audio.shape) > 1:
49
+ audio = librosa.to_mono(audio)
50
+
51
+ # Extract pitch using librosa
52
+ pitches, magnitudes = librosa.piptrack(y=audio, sr=sample_rate)
53
+
54
+ # Apply pitch shifting for female voice
55
+ audio_female = librosa.effects.pitch_shift(
56
+ audio,
57
+ sr=sample_rate,
58
+ n_steps=self.female_pitch_shift
59
+ )
60
+
61
+ # Apply formant shifting using a simple method - resample and scale back
62
+ y_stretched = librosa.effects.time_stretch(audio_female, rate=self.female_formant_shift)
63
+ audio_female_formant = librosa.resample(
64
+ y_stretched,
65
+ orig_sr=sample_rate,
66
+ target_sr=int(sample_rate * self.female_formant_shift)
67
+ )
68
+ audio_female_formant = librosa.resample(
69
+ audio_female_formant,
70
+ orig_sr=int(sample_rate * self.female_formant_shift),
71
+ target_sr=sample_rate
72
+ )
73
+
74
+ # Match the length with the original
75
+ min_len = min(len(audio), len(audio_female_formant))
76
+ audio_female_formant = audio_female_formant[:min_len]
77
+
78
+ # Save the result
79
+ sf.write(output_path, audio_female_formant, sample_rate)
80
+ return output_path
81
+
82
+ except Exception as e:
83
+ print(f"Error during conversion: {e}")
84
+ return None
85
+
86
+ # Initialize voice converter
87
+ voice_converter = VoiceConverter()
88
+
89
+ # Create Gradio interface
90
+ def convert_voice(audio_file):
91
+ """Function to handle the Gradio interface"""
92
+ # Create a temporary file path for the output
93
+ input_filename = os.path.basename(audio_file)
94
+ output_filename = f"female_{input_filename}"
95
+ output_path = os.path.join(os.path.dirname(audio_file), output_filename)
96
+
97
+ # Perform voice conversion
98
+ result = voice_converter.convert_to_female(audio_file, output_path)
99
+
100
+ if result:
101
+ return result
102
+ else:
103
+ return None
104
+
105
+ # Define the Gradio interface
106
+ demo = gr.Interface(
107
+ fn=convert_voice,
108
+ inputs=gr.Audio(type="filepath", label="Upload Male Voice Audio"),
109
+ outputs=gr.Audio(label="Converted Female Voice"),
110
+ title="Voice Gender Conversion (Male to Female)",
111
+ description="Upload an audio file with a male voice to convert it to a female voice using AI.",
112
+ examples=[["sample1.wav"], ["sample2.wav"]],
113
+ theme=gr.themes.Soft()
114
+ )
115
+
116
+ # Launch the app
117
+ if __name__ == "__main__":
118
+ demo.launch(share=True)