Spaces:
Runtime error
Runtime error
Commit
·
a440325
1
Parent(s):
13679ee
whisper audio question input
Browse files- .gitignore +1 -0
- app.py +31 -7
- examples/sample1_en.mp3 +0 -0
- examples/sample2_jp.mp3 +0 -0
- examples/sample3_gr.mp3 +0 -0
- examples/sample4_sp.mp3 +0 -0
- requirements.txt +2 -1
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
.idea
|
2 |
.env
|
3 |
/venv/
|
|
|
|
1 |
.idea
|
2 |
.env
|
3 |
/venv/
|
4 |
+
/rough.py
|
app.py
CHANGED
@@ -14,6 +14,8 @@ from monai.transforms import (
|
|
14 |
Orientationd,
|
15 |
EnsureChannelFirstd,
|
16 |
)
|
|
|
|
|
17 |
import torch
|
18 |
|
19 |
title = 'Detect and Segment Brain Tumors 🧠'
|
@@ -36,6 +38,9 @@ preproc_transforms = Compose(
|
|
36 |
)
|
37 |
post_trans = Compose([Activations(sigmoid=True), AsDiscrete(threshold=0.5)])
|
38 |
|
|
|
|
|
|
|
39 |
model_tumor_seg = SegResNet(
|
40 |
blocks_down=[1, 2, 2, 4],
|
41 |
blocks_up=[1, 1, 1],
|
@@ -62,13 +67,31 @@ def inference(input):
|
|
62 |
|
63 |
|
64 |
examples = [
|
65 |
-
['examples/BRATS_225.nii.gz', 83, 2],
|
66 |
-
['examples/BRATS_485.nii.gz', 90, 1],
|
67 |
-
['examples/BRATS_485.nii.gz', 110, 0]
|
68 |
]
|
69 |
|
70 |
|
71 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
tumor_file_path = tumor_file.name
|
73 |
processed_data = preproc_transforms({'image': [tumor_file_path]})
|
74 |
tensor_3d_input = processed_data['image'].unsqueeze(0).to('cpu')
|
@@ -87,18 +110,19 @@ def detector(tumor_file, slice_number, channel, audio_question):
|
|
87 |
plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0)
|
88 |
segment_image = np.asarray(Image.open(output_image_path))
|
89 |
os.remove(output_image_path)
|
90 |
-
return (channel_image, segment_image,
|
91 |
|
92 |
|
93 |
interface = gr.Interface(fn=detector, inputs=[gr.File(label="Tumor File"),
|
94 |
gr.Slider(0, 200, 50, step=1, label="Slice Number"),
|
95 |
-
gr.Radio(
|
|
|
96 |
gr.Audio(source="microphone"), ],
|
97 |
outputs=[gr.Image(label='channel', shape=(1, 1)),
|
98 |
gr.Image(label='Segmented Tumor', shape=(1, 1)),
|
99 |
gr.Textbox(label="Medical Summary")], title=title,
|
100 |
examples=examples,
|
101 |
-
description=description,
|
102 |
|
103 |
theme = gr.themes.Default().set(
|
104 |
button_primary_background_fill="#FF0000",
|
|
|
14 |
Orientationd,
|
15 |
EnsureChannelFirstd,
|
16 |
)
|
17 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
18 |
+
import librosa
|
19 |
import torch
|
20 |
|
21 |
title = 'Detect and Segment Brain Tumors 🧠'
|
|
|
38 |
)
|
39 |
post_trans = Compose([Activations(sigmoid=True), AsDiscrete(threshold=0.5)])
|
40 |
|
41 |
+
processor_whisper = WhisperProcessor.from_pretrained("whisper-tiny")
|
42 |
+
model_whisper = WhisperForConditionalGeneration.from_pretrained("whisper-tiny")
|
43 |
+
|
44 |
model_tumor_seg = SegResNet(
|
45 |
blocks_down=[1, 2, 2, 4],
|
46 |
blocks_up=[1, 1, 1],
|
|
|
67 |
|
68 |
|
69 |
examples = [
|
70 |
+
['examples/BRATS_225.nii.gz', 83, 2, 'english', 'examples/sample1_en.mp3'],
|
71 |
+
['examples/BRATS_485.nii.gz', 90, 1, 'japanese', 'examples/sample2_jp.mp3'],
|
72 |
+
['examples/BRATS_485.nii.gz', 110, 0, 'german', 'examples/sample3_gr.mp3'],
|
73 |
]
|
74 |
|
75 |
|
76 |
+
def process_audio(sampling_rate, waveform):
|
77 |
+
waveform = waveform / 32678.0
|
78 |
+
if len(waveform.shape) > 1:
|
79 |
+
waveform = librosa.to_mono(waveform.T)
|
80 |
+
if sampling_rate != 16000:
|
81 |
+
waveform = librosa.resample(waveform, orig_sr=sampling_rate, target_sr=16000)
|
82 |
+
waveform = waveform[:16000 * 30]
|
83 |
+
waveform = torch.tensor(waveform)
|
84 |
+
return waveform
|
85 |
+
|
86 |
+
|
87 |
+
def detector(tumor_file, slice_number, channel, language, audio_question):
|
88 |
+
sampling_rate, waveform = audio_question
|
89 |
+
forced_decoder_ids = processor_whisper.get_decoder_prompt_ids(language=language, task="transcribe")
|
90 |
+
waveform = process_audio(sampling_rate, waveform)
|
91 |
+
audio_inputs = processor_whisper(audio=waveform, sampling_rate=16000, return_tensors="pt")
|
92 |
+
predicted_ids = model_whisper.generate(**audio_inputs, max_length=400, forced_decoder_ids=forced_decoder_ids)
|
93 |
+
transcription = processor_whisper.batch_decode(predicted_ids, skip_special_tokens=True)
|
94 |
+
output_text = transcription[0]
|
95 |
tumor_file_path = tumor_file.name
|
96 |
processed_data = preproc_transforms({'image': [tumor_file_path]})
|
97 |
tensor_3d_input = processed_data['image'].unsqueeze(0).to('cpu')
|
|
|
110 |
plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0)
|
111 |
segment_image = np.asarray(Image.open(output_image_path))
|
112 |
os.remove(output_image_path)
|
113 |
+
return (channel_image, segment_image, output_text)
|
114 |
|
115 |
|
116 |
interface = gr.Interface(fn=detector, inputs=[gr.File(label="Tumor File"),
|
117 |
gr.Slider(0, 200, 50, step=1, label="Slice Number"),
|
118 |
+
gr.Radio((0, 1, 2), label="Channel"),
|
119 |
+
gr.Radio(("english", "japanese", "german", "spanish"), label="Language"),
|
120 |
gr.Audio(source="microphone"), ],
|
121 |
outputs=[gr.Image(label='channel', shape=(1, 1)),
|
122 |
gr.Image(label='Segmented Tumor', shape=(1, 1)),
|
123 |
gr.Textbox(label="Medical Summary")], title=title,
|
124 |
examples=examples,
|
125 |
+
description=description, theme='dark')
|
126 |
|
127 |
theme = gr.themes.Default().set(
|
128 |
button_primary_background_fill="#FF0000",
|
examples/sample1_en.mp3
ADDED
Binary file (23.5 kB). View file
|
|
examples/sample2_jp.mp3
ADDED
Binary file (30.4 kB). View file
|
|
examples/sample3_gr.mp3
ADDED
Binary file (19.8 kB). View file
|
|
examples/sample4_sp.mp3
ADDED
Binary file (22.2 kB). View file
|
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ torchvision
|
|
4 |
torchaudio
|
5 |
nibabel
|
6 |
monai
|
7 |
-
matplotlib
|
|
|
|
4 |
torchaudio
|
5 |
nibabel
|
6 |
monai
|
7 |
+
matplotlib
|
8 |
+
librosa
|