Spaces:

Athspi
/

Gggg

Running

App Files Files Community

Athspi commited on Mar 11

Commit

cab5167

verified ·

1 Parent(s): 573a1cc

Create app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+import torch
+import numpy as np
+import gradio as gr
+import librosa
+import soundfile as sf
+from scipy.io import wavfile
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+from scipy import signal
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+class VoiceConverter:
+    def __init__(self):
+        # Load wav2vec model for audio feature extraction
+        self.processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+        self.model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h").to(device)
+        # Parameters for voice conversion
+        self.female_pitch_shift = 2.0  # Shift pitch up for female voice
+        self.female_formant_shift = 1.2  # Adjust formants for female voice
+    def extract_features(self, audio, sample_rate):
+        """Extract audio features using wav2vec2"""
+        # Resample if needed
+        if sample_rate != 16000:
+            audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)
+            sample_rate = 16000
+        # Extract features
+        inputs = self.processor(audio, sampling_rate=sample_rate, return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Get hidden states (features)
+        hidden_states = outputs.hidden_states[-1].squeeze().cpu().numpy()
+        return hidden_states
+    def convert_to_female(self, audio_path, output_path):
+        """Convert voice from male to female"""
+        try:
+            # Load audio file
+            audio, sample_rate = librosa.load(audio_path, sr=None)
+            # Convert to mono if stereo
+            if len(audio.shape) > 1:
+                audio = librosa.to_mono(audio)
+            # Extract pitch using librosa
+            pitches, magnitudes = librosa.piptrack(y=audio, sr=sample_rate)
+            # Apply pitch shifting for female voice
+            audio_female = librosa.effects.pitch_shift(
+                audio,
+                sr=sample_rate,
+                n_steps=self.female_pitch_shift
+            )
+            # Apply formant shifting using a simple method - resample and scale back
+            y_stretched = librosa.effects.time_stretch(audio_female, rate=self.female_formant_shift)
+            audio_female_formant = librosa.resample(
+                y_stretched,
+                orig_sr=sample_rate,
+                target_sr=int(sample_rate * self.female_formant_shift)
+            )
+            audio_female_formant = librosa.resample(
+                audio_female_formant,
+                orig_sr=int(sample_rate * self.female_formant_shift),
+                target_sr=sample_rate
+            )
+            # Match the length with the original
+            min_len = min(len(audio), len(audio_female_formant))
+            audio_female_formant = audio_female_formant[:min_len]
+            # Save the result
+            sf.write(output_path, audio_female_formant, sample_rate)
+            return output_path
+        except Exception as e:
+            print(f"Error during conversion: {e}")
+            return None
+# Initialize voice converter
+voice_converter = VoiceConverter()
+# Create Gradio interface
+def convert_voice(audio_file):
+    """Function to handle the Gradio interface"""
+    # Create a temporary file path for the output
+    input_filename = os.path.basename(audio_file)
+    output_filename = f"female_{input_filename}"
+    output_path = os.path.join(os.path.dirname(audio_file), output_filename)
+    # Perform voice conversion
+    result = voice_converter.convert_to_female(audio_file, output_path)
+    if result:
+        return result
+    else:
+        return None
+# Define the Gradio interface
+demo = gr.Interface(
+    fn=convert_voice,
+    inputs=gr.Audio(type="filepath", label="Upload Male Voice Audio"),
+    outputs=gr.Audio(label="Converted Female Voice"),
+    title="Voice Gender Conversion (Male to Female)",
+    description="Upload an audio file with a male voice to convert it to a female voice using AI.",
+    examples=[["sample1.wav"], ["sample2.wav"]],
+    theme=gr.themes.Soft()
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(share=True)