Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import gradio as gr | |
| from transformers import pipeline | |
| from pydub import AudioSegment | |
| import os | |
| import speech_recognition as sr | |
| model_name = "voidful/wav2vec2-xlsr-multilingual-56" | |
| model0 = pipeline(task="automatic-speech-recognition", | |
| model=model_name) | |
| model_name = "SLPL/Sharif-wav2vec2" | |
| model = pipeline(task="automatic-speech-recognition", | |
| model=model_name) | |
| model_name = "ghofrani/common8" | |
| model1 = pipeline(task="automatic-speech-recognition", | |
| model=model_name) | |
| import json | |
| def predict_fa(speech,model): | |
| if model== "SLPL/Sharif-wav2vec2": | |
| text = model(speech,return_timestamps="word" ) | |
| elif model== "ghofrani/common8": | |
| text = model1(speech,return_timestamps="word" ) | |
| elif model== "voidful/wav2vec2-xlsr-multilingual-56": | |
| text = model0(speech,return_timestamps="word" ) | |
| return [text['text'],json.dumps(text)] | |
| def convert_to_wav(filename): | |
| filenameObj=os.path.splitext(filename) | |
| audio = AudioSegment.from_file(filename,format=filenameObj[1].replace(".","")) | |
| new_filename = filenameObj[0] + ".wav" | |
| while os.path.exists(new_filename): | |
| new_filename = os.path.splitext(new_filename)[0]+"(1)"+ ".wav" | |
| audio.export(new_filename, format="wav") | |
| print(f"Converting {filename} to {new_filename}...") | |
| return new_filename | |
| def g_rec(audio_File ,language): | |
| r = sr.Recognizer() | |
| if not os.path.splitext(audio_File)[1]==".wav": | |
| audio_File=convert_to_wav(audio_File) | |
| hellow=sr.AudioFile(audio_File) | |
| with hellow as source: | |
| audio = r.record(source) | |
| try: | |
| s = r.recognize_google(audio,language =language) | |
| res= "Text: "+s | |
| except Exception as e: | |
| res= "Exception: "+str(e) | |
| return res | |
| # Export file as .wav | |
| #predict(load_file_to_data('audio file path',sampling_rate=16_000)) # beware of the audio file sampling rate | |
| #predict_lang_specific(load_file_to_data('audio file path',sampling_rate=16_000),'en') # beware of the audio file sampling rate | |
| with gr.Blocks() as demo: | |
| gr.Markdown("multilingual Speech Recognition") | |
| with gr.Tab("Persian models"): | |
| inputs_speech_fa =gr.Audio(source="upload", type="filepath", optional=True,label="Upload your audio:") | |
| inputs_model_fa =gr.inputs.Radio(label="Language", choices=["ghofrani/common8","SLPL/Sharif-wav2vec2","voidful/wav2vec2-xlsr-multilingual-56"]) | |
| output_transcribe1_fa = gr.Textbox(label="Transcribed text:") | |
| output_transcribe1_fa1 = gr.Textbox(label="Transcribed text with timestamps:") | |
| transcribe_audio1_fa= gr.Button("Submit") | |
| with gr.Tab("google"): | |
| gr.Markdown("set your speech language") | |
| inputs_speech1 =[ | |
| gr.Audio(source="upload", type="filepath"), | |
| gr.Dropdown(choices=["af-ZA","am-ET","ar-AE","ar-BH","ar-DZ","ar-EG","ar-IL","ar-IQ","ar-JO","ar-KW","ar-LB","ar-MA","ar-MR","ar-OM","ar-PS","ar-QA","ar-SA","ar-TN","ar-YE","az-AZ","bg-BG","bn-BD","bn-IN","bs-BA","ca-ES","cs-CZ","da-DK","de-AT","de-CH","de-DE","el-GR","en-AU","en-CA","en-GB","en-GH","en-HK","en-IE","en-IN","en-KE","en-NG","en-NZ","en-PH","en-PK","en-SG","en-TZ","en-US","en-ZA","es-AR","es-BO","es-CL","es-CO","es-CR","es-DO","es-EC","es-ES","es-GT","es-HN","es-MX","es-NI","es-PA","es-PE","es-PR","es-PY","es-SV","es-US","es-UY","es-VE","et-EE","eu-ES","fa-IR","fi-FI","fil-PH","fr-BE","fr-CA","fr-CH","fr-FR","gl-ES","gu-IN","hi-IN","hr-HR","hu-HU","hy-AM","id-ID","is-IS","it-CH","it-IT","iw-IL","ja-JP","jv-ID","ka-GE","kk-KZ","km-KH","kn-IN","ko-KR","lo-LA","lt-LT","lv-LV","mk-MK","ml-IN","mn-MN","mr-IN","ms-MY","my-MM","ne-NP","nl-BE","nl-NL","no-NO","pa-Guru-IN","pl-PL","pt-BR","pt-PT","ro-RO","ru-RU","si-LK","sk-SK","sl-SI","sq-AL","sr-RS","su-ID","sv-SE","sw-KE","sw-TZ","ta-IN","ta-LK","ta-MY","ta-SG","te-IN","th-TH","tr-TR","uk-UA","ur-IN","ur-PK","uz-UZ","vi-VN","yue-Hant-HK","zh (cmn-Hans-CN)","zh-TW (cmn-Hant-TW)","zu-ZA"] | |
| ,value="fa-IR",label="language code") | |
| ] | |
| output_transcribe1 = gr.Textbox(label="output") | |
| transcribe_audio1= gr.Button("Submit") | |
| transcribe_audio1_fa.click(fn=predict_fa, | |
| inputs=[inputs_speech_fa ,inputs_model_fa ], | |
| outputs=[output_transcribe1_fa ,output_transcribe1_fa1 ] ) | |
| transcribe_audio1_go.click(fn=g_rec, | |
| inputs=inputs_speech1 , | |
| outputs=output_transcribe1 ) | |
| if __name__ == "__main__": | |
| demo.launch() | |