leenag commited on
Commit
8332e59
·
verified ·
1 Parent(s): 7bc0f0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -54
app.py CHANGED
@@ -1,66 +1,66 @@
1
- # import gradio as gr
2
 
3
- # demo = gr.load("models/leenag/Malasar_Luke_Dict",
4
- # examples = [["./luke1.1.wav", "transcribe"],
5
- # ],
6
- # title = "VRCLC Malasar Speech Recognition Demo",
7
- # description=("Transcribe microphone or audio inputs with the click of a button!"),
8
- # cache_examples=False
9
- # )
10
 
11
- # demo.launch()
12
- import gradio as gr
13
- import torch
14
- import soundfile as sf
15
- from transformers import pipeline
16
 
17
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
18
- pipe = pipeline(
19
- "automatic-speech-recognition",
20
- model="vrclc/Malasar_medium_MTF",
21
- chunk_length_s=10,
22
- device=device,
23
- )
24
 
25
- def transcribe(audio):
26
- """Transcribes Malasar speech from an audio file."""
27
- try:
28
- if audio is None:
29
- return "Please record or upload an audio file."
30
 
31
- print(f"[DEBUG] Received audio: {audio}")
32
 
33
- # Handle filepath case from Gradio
34
- audio_path = audio if isinstance(audio, str) else audio.get("name", None)
35
- if audio_path is None:
36
- return "Could not read audio file."
37
 
38
- print(f"[DEBUG] Reading audio file: {audio_path}")
39
- audio_data, sample_rate = sf.read(audio_path)
40
 
41
- print(f"[DEBUG] Audio sample rate: {sample_rate}, shape: {audio_data.shape}")
42
 
43
- transcription = pipe(
44
- {"array": audio_data, "sampling_rate": sample_rate},
45
- chunk_length_s=10,
46
- batch_size=8,
47
- )["text"]
48
 
49
- print(f"[DEBUG] Transcription: {transcription}")
50
- return transcription
51
 
52
- except Exception as e:
53
- import traceback
54
- print("[ERROR] Exception during transcription:")
55
- traceback.print_exc()
56
- return f"Error: {str(e)}"
57
 
58
- iface = gr.Interface(
59
- fn=transcribe,
60
- inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
61
- outputs="text",
62
- title="Malasar Speech Recognition",
63
- description="Record or upload Malasar speech and submit to get the transcribed text.",
64
- examples=[["luke1.1.wav"]],
65
- )
66
- iface.launch()
 
1
+ import gradio as gr
2
 
3
+ demo = gr.load("models/leenag/Malasar_Luke_Dict",
4
+ examples = [["./luke1.1.wav", "transcribe"],
5
+ ],
6
+ title = "VRCLC Malasar Speech Recognition Demo",
7
+ description=("Transcribe microphone or audio inputs with the click of a button!"),
8
+ cache_examples=False
9
+ )
10
 
11
+ demo.launch()
12
+ # import gradio as gr
13
+ # import torch
14
+ # import soundfile as sf
15
+ # from transformers import pipeline
16
 
17
+ # device = "cuda:0" if torch.cuda.is_available() else "cpu"
18
+ # pipe = pipeline(
19
+ # "automatic-speech-recognition",
20
+ # model="vrclc/Malasar_medium_MTF",
21
+ # chunk_length_s=10,
22
+ # device=device,
23
+ # )
24
 
25
+ # def transcribe(audio):
26
+ # """Transcribes Malasar speech from an audio file."""
27
+ # try:
28
+ # if audio is None:
29
+ # return "Please record or upload an audio file."
30
 
31
+ # print(f"[DEBUG] Received audio: {audio}")
32
 
33
+ # # Handle filepath case from Gradio
34
+ # audio_path = audio if isinstance(audio, str) else audio.get("name", None)
35
+ # if audio_path is None:
36
+ # return "Could not read audio file."
37
 
38
+ # print(f"[DEBUG] Reading audio file: {audio_path}")
39
+ # audio_data, sample_rate = sf.read(audio_path)
40
 
41
+ # print(f"[DEBUG] Audio sample rate: {sample_rate}, shape: {audio_data.shape}")
42
 
43
+ # transcription = pipe(
44
+ # {"array": audio_data, "sampling_rate": sample_rate},
45
+ # chunk_length_s=10,
46
+ # batch_size=8,
47
+ # )["text"]
48
 
49
+ # print(f"[DEBUG] Transcription: {transcription}")
50
+ # return transcription
51
 
52
+ # except Exception as e:
53
+ # import traceback
54
+ # print("[ERROR] Exception during transcription:")
55
+ # traceback.print_exc()
56
+ # return f"Error: {str(e)}"
57
 
58
+ # iface = gr.Interface(
59
+ # fn=transcribe,
60
+ # inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
61
+ # outputs="text",
62
+ # title="Malasar Speech Recognition",
63
+ # description="Record or upload Malasar speech and submit to get the transcribed text.",
64
+ # examples=[["luke1.1.wav"]],
65
+ # )
66
+ # iface.launch()