Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	
		jason-on-salt-a40
		
	commited on
		
		
					Commit 
							
							·
						
						b1f4e2f
	
1
								Parent(s):
							
							579d79b
								
fix space error. fix encodec download path
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -63,7 +63,7 @@ class WhisperModel: 
     | 
|
| 63 | 
         
             
                def transcribe(self, audio_path):
         
     | 
| 64 | 
         
             
                    return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
         
     | 
| 65 | 
         | 
| 66 | 
         
            -
             
     | 
| 67 | 
         
             
            class WhisperxModel:
         
     | 
| 68 | 
         
             
                def __init__(self, model_name, align_model: WhisperxAlignModel):
         
     | 
| 69 | 
         
             
                    from whisperx import load_model
         
     | 
| 
         @@ -100,7 +100,7 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, 
     | 
|
| 100 | 
         | 
| 101 | 
         
             
                encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
         
     | 
| 102 | 
         
             
                if not os.path.exists(encodec_fn):
         
     | 
| 103 | 
         
            -
                    os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th")
         
     | 
| 104 | 
         | 
| 105 | 
         
             
                voicecraft_model = {
         
     | 
| 106 | 
         
             
                    "config": config,
         
     | 
| 
         @@ -114,9 +114,11 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, 
     | 
|
| 114 | 
         | 
| 115 | 
         
             
            def get_transcribe_state(segments):
         
     | 
| 116 | 
         
             
                words_info = [word_info for segment in segments for word_info in segment["words"]]
         
     | 
| 
         | 
|
| 
         | 
|
| 117 | 
         
             
                return {
         
     | 
| 118 | 
         
             
                    "segments": segments,
         
     | 
| 119 | 
         
            -
                    "transcript":  
     | 
| 120 | 
         
             
                    "words_info": words_info,
         
     | 
| 121 | 
         
             
                    "transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
         
     | 
| 122 | 
         
             
                    "transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
         
     | 
| 
         @@ -140,7 +142,7 @@ def transcribe(seed, audio_path): 
     | 
|
| 140 | 
         
             
                    state
         
     | 
| 141 | 
         
             
                ]
         
     | 
| 142 | 
         | 
| 143 | 
         
            -
             
     | 
| 144 | 
         
             
            def align_segments(transcript, audio_path):
         
     | 
| 145 | 
         
             
                from aeneas.executetask import ExecuteTask
         
     | 
| 146 | 
         
             
                from aeneas.task import Task
         
     | 
| 
         @@ -363,7 +365,7 @@ If disabled, you should write the target transcript yourself:</br> 
     | 
|
| 363 | 
         
             
             - In Edit mode write full prompt</br>
         
     | 
| 364 | 
         
             
            """
         
     | 
| 365 | 
         | 
| 366 | 
         
            -
            demo_original_transcript = " 
     | 
| 367 | 
         | 
| 368 | 
         
             
            demo_text = {
         
     | 
| 369 | 
         
             
                "TTS": {
         
     | 
| 
         @@ -603,6 +605,7 @@ if __name__ == "__main__": 
     | 
|
| 603 | 
         
             
                parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
         
     | 
| 604 | 
         
             
                parser.add_argument("--port", default=7860, type=int, help="App port")
         
     | 
| 605 | 
         
             
                parser.add_argument("--share", action="store_true", help="Launch with public url")
         
     | 
| 
         | 
|
| 606 | 
         | 
| 607 | 
         
             
                os.environ["USER"] = os.getenv("USER", "user")
         
     | 
| 608 | 
         
             
                args = parser.parse_args()
         
     | 
| 
         @@ -611,4 +614,4 @@ if __name__ == "__main__": 
     | 
|
| 611 | 
         
             
                MODELS_PATH = args.models_path
         
     | 
| 612 | 
         | 
| 613 | 
         
             
                app = get_app()
         
     | 
| 614 | 
         
            -
                app.queue().launch(share=args.share, server_port=args.port)
         
     | 
| 
         | 
|
| 63 | 
         
             
                def transcribe(self, audio_path):
         
     | 
| 64 | 
         
             
                    return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
         
     | 
| 65 | 
         | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
             
            class WhisperxModel:
         
     | 
| 68 | 
         
             
                def __init__(self, model_name, align_model: WhisperxAlignModel):
         
     | 
| 69 | 
         
             
                    from whisperx import load_model
         
     | 
| 
         | 
|
| 100 | 
         | 
| 101 | 
         
             
                encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
         
     | 
| 102 | 
         
             
                if not os.path.exists(encodec_fn):
         
     | 
| 103 | 
         
            +
                    os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th -O " + encodec_fn)
         
     | 
| 104 | 
         | 
| 105 | 
         
             
                voicecraft_model = {
         
     | 
| 106 | 
         
             
                    "config": config,
         
     | 
| 
         | 
|
| 114 | 
         | 
| 115 | 
         
             
            def get_transcribe_state(segments):
         
     | 
| 116 | 
         
             
                words_info = [word_info for segment in segments for word_info in segment["words"]]
         
     | 
| 117 | 
         
            +
                transcript = " ".join([segment["text"] for segment in segments])
         
     | 
| 118 | 
         
            +
                transcript = transcript[1:] if transcript[0] == " " else transcript
         
     | 
| 119 | 
         
             
                return {
         
     | 
| 120 | 
         
             
                    "segments": segments,
         
     | 
| 121 | 
         
            +
                    "transcript": transcript,
         
     | 
| 122 | 
         
             
                    "words_info": words_info,
         
     | 
| 123 | 
         
             
                    "transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
         
     | 
| 124 | 
         
             
                    "transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
         
     | 
| 
         | 
|
| 142 | 
         
             
                    state
         
     | 
| 143 | 
         
             
                ]
         
     | 
| 144 | 
         | 
| 145 | 
         
            +
            @spaces.GPU(duration=60)
         
     | 
| 146 | 
         
             
            def align_segments(transcript, audio_path):
         
     | 
| 147 | 
         
             
                from aeneas.executetask import ExecuteTask
         
     | 
| 148 | 
         
             
                from aeneas.task import Task
         
     | 
| 
         | 
|
| 365 | 
         
             
             - In Edit mode write full prompt</br>
         
     | 
| 366 | 
         
             
            """
         
     | 
| 367 | 
         | 
| 368 | 
         
            +
            demo_original_transcript = "But when I had approached so near to them, the common object, which the sense deceives, lost not by distance any of its marks."
         
     | 
| 369 | 
         | 
| 370 | 
         
             
            demo_text = {
         
     | 
| 371 | 
         
             
                "TTS": {
         
     | 
| 
         | 
|
| 605 | 
         
             
                parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
         
     | 
| 606 | 
         
             
                parser.add_argument("--port", default=7860, type=int, help="App port")
         
     | 
| 607 | 
         
             
                parser.add_argument("--share", action="store_true", help="Launch with public url")
         
     | 
| 608 | 
         
            +
                parser.add_argument("--server_name", default="127.0.0.1", type=str, help="Server name for launching the app. 127.0.0.1 for localhost; 0.0.0.0 to allow access from other machines in the local network. Might also give access to external users depends on the firewall settings.")
         
     | 
| 609 | 
         | 
| 610 | 
         
             
                os.environ["USER"] = os.getenv("USER", "user")
         
     | 
| 611 | 
         
             
                args = parser.parse_args()
         
     | 
| 
         | 
|
| 614 | 
         
             
                MODELS_PATH = args.models_path
         
     | 
| 615 | 
         | 
| 616 | 
         
             
                app = get_app()
         
     | 
| 617 | 
         
            +
                app.queue().launch(share=args.share, server_name=args.server_name, server_port=args.port)
         
     |