Spaces:
Build error
Build error
Bomme
commited on
Commit
·
eae574d
1
Parent(s):
6183d1a
replace Config class with DummyConfig and removing inference.yml
Browse filesTHere were problems with google/cloud/storage/client dependencies
- app.py +17 -21
- configs/inference.yml +0 -61
app.py
CHANGED
@@ -7,14 +7,26 @@ from typing import Literal
|
|
7 |
import gradio as gr
|
8 |
import torch
|
9 |
|
10 |
-
from NatureLM.config import Config
|
11 |
from NatureLM.models.NatureLM import NatureLM
|
12 |
from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
|
13 |
|
14 |
-
CONFIG
|
15 |
MODEL: NatureLM = None
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
|
19 |
cuda_enabled = torch.cuda.is_available()
|
20 |
samples = prepare_sample_waveforms(audios, cuda_enabled)
|
@@ -262,13 +274,8 @@ def _long_recording_tab():
|
|
262 |
)
|
263 |
|
264 |
|
265 |
-
def main(
|
266 |
-
|
267 |
-
cfg_path: str | Path,
|
268 |
-
options: list[str] = [],
|
269 |
-
device: str = "cuda",
|
270 |
-
):
|
271 |
-
cfg = Config.from_sources(yaml_file=cfg_path, cli_args=options)
|
272 |
model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
|
273 |
model.to(device)
|
274 |
model.eval()
|
@@ -325,18 +332,7 @@ if __name__ == "__main__":
|
|
325 |
default=Path(__file__).parent / "assets",
|
326 |
help="Directory containing the assets (favicon, examples, etc.)",
|
327 |
)
|
328 |
-
|
329 |
-
"--cfg-path",
|
330 |
-
type=str,
|
331 |
-
default=Path(__file__).parent / "configs/inference.yml",
|
332 |
-
help="Path to the config file",
|
333 |
-
)
|
334 |
-
parser.add_argument(
|
335 |
-
"--options",
|
336 |
-
nargs="*",
|
337 |
-
default=[],
|
338 |
-
help="Additional options to pass to the config file",
|
339 |
-
)
|
340 |
args = parser.parse_args()
|
341 |
|
342 |
main(args.assets_dir, args.cfg_path, args.options)
|
|
|
7 |
import gradio as gr
|
8 |
import torch
|
9 |
|
|
|
10 |
from NatureLM.models.NatureLM import NatureLM
|
11 |
from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
|
12 |
|
13 |
+
CONFIG = None
|
14 |
MODEL: NatureLM = None
|
15 |
|
16 |
|
17 |
+
class DummyConfig:
|
18 |
+
def __init__(self):
|
19 |
+
self.generate = {
|
20 |
+
"max_new_tokens": 300,
|
21 |
+
"num_beams": 2,
|
22 |
+
"do_sample": False,
|
23 |
+
"min_length": 1,
|
24 |
+
"temperature": 0.1,
|
25 |
+
"repetition_penalty": 1.0,
|
26 |
+
"length_penalty": 1.0,
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
|
31 |
cuda_enabled = torch.cuda.is_available()
|
32 |
samples = prepare_sample_waveforms(audios, cuda_enabled)
|
|
|
274 |
)
|
275 |
|
276 |
|
277 |
+
def main(assets_dir: Path, device: str = "cuda"):
|
278 |
+
cfg = DummyConfig()
|
|
|
|
|
|
|
|
|
|
|
279 |
model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
|
280 |
model.to(device)
|
281 |
model.eval()
|
|
|
332 |
default=Path(__file__).parent / "assets",
|
333 |
help="Directory containing the assets (favicon, examples, etc.)",
|
334 |
)
|
335 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
args = parser.parse_args()
|
337 |
|
338 |
main(args.assets_dir, args.cfg_path, args.options)
|
configs/inference.yml
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
model:
|
2 |
-
llama_path: "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
3 |
-
|
4 |
-
freeze_beats: True
|
5 |
-
|
6 |
-
use_audio_Qformer: True
|
7 |
-
max_pooling: False
|
8 |
-
downsample_factor: 8
|
9 |
-
freeze_audio_QFormer: False
|
10 |
-
window_level_Qformer: True
|
11 |
-
num_audio_query_token: 1
|
12 |
-
second_per_window: 0.333333
|
13 |
-
second_stride: 0.333333
|
14 |
-
|
15 |
-
audio_llama_proj_model: ""
|
16 |
-
freeze_audio_llama_proj: False
|
17 |
-
|
18 |
-
lora: True
|
19 |
-
lora_rank: 32
|
20 |
-
lora_alpha: 32
|
21 |
-
lora_dropout: 0.1
|
22 |
-
|
23 |
-
prompt_template: "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
24 |
-
max_txt_len: 160
|
25 |
-
end_sym: <|end_of_text|>
|
26 |
-
|
27 |
-
beats_cfg:
|
28 |
-
input_patch_size: 16
|
29 |
-
embed_dim: 512
|
30 |
-
conv_bias: False
|
31 |
-
encoder_layers: 12
|
32 |
-
encoder_embed_dim: 768
|
33 |
-
encoder_ffn_embed_dim: 3072
|
34 |
-
encoder_attention_heads: 12
|
35 |
-
activation_fn: "gelu"
|
36 |
-
layer_wise_gradient_decay_ratio: 0.6
|
37 |
-
layer_norm_first: False
|
38 |
-
deep_norm: True
|
39 |
-
dropout: 0.0
|
40 |
-
attention_dropout: 0.0
|
41 |
-
activation_dropout: 0.0
|
42 |
-
encoder_layerdrop: 0.05
|
43 |
-
dropout_input: 0.0
|
44 |
-
conv_pos: 128
|
45 |
-
conv_pos_groups: 16
|
46 |
-
relative_position_embedding: True
|
47 |
-
num_buckets: 320
|
48 |
-
max_distance: 800
|
49 |
-
gru_rel_pos: True
|
50 |
-
finetuned_model: True
|
51 |
-
predictor_dropout: 0.0
|
52 |
-
predictor_class: 527
|
53 |
-
|
54 |
-
generate:
|
55 |
-
max_new_tokens: 300
|
56 |
-
num_beams: 2
|
57 |
-
do_sample: False
|
58 |
-
min_length: 1
|
59 |
-
temperature: 0.1
|
60 |
-
repetition_penalty: 1.0
|
61 |
-
length_penalty: 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|