Spaces:

bennoweck
/

NatureLM-audio

Build error

Bomme commited on May 7

Commit

eae574d

1 Parent(s): 6183d1a

replace Config class with DummyConfig and removing inference.yml

THere were problems with google/cloud/storage/client dependencies

Files changed (2) hide show

app.py CHANGED Viewed

@@ -7,14 +7,26 @@ from typing import Literal
 import gradio as gr
 import torch
-from NatureLM.config import Config
 from NatureLM.models.NatureLM import NatureLM
 from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
-CONFIG: Config = None
 MODEL: NatureLM = None
 def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
     cuda_enabled = torch.cuda.is_available()
     samples = prepare_sample_waveforms(audios, cuda_enabled)
@@ -262,13 +274,8 @@ def _long_recording_tab():
     )
-def main(
-    assets_dir: Path,
-    cfg_path: str | Path,
-    options: list[str] = [],
-    device: str = "cuda",
-):
-    cfg = Config.from_sources(yaml_file=cfg_path, cli_args=options)
     model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
     model.to(device)
     model.eval()
@@ -325,18 +332,7 @@ if __name__ == "__main__":
         default=Path(__file__).parent / "assets",
         help="Directory containing the assets (favicon, examples, etc.)",
     )
-    parser.add_argument(
-        "--cfg-path",
-        type=str,
-        default=Path(__file__).parent / "configs/inference.yml",
-        help="Path to the config file",
-    )
-    parser.add_argument(
-        "--options",
-        nargs="*",
-        default=[],
-        help="Additional options to pass to the config file",
-    )
     args = parser.parse_args()
     main(args.assets_dir, args.cfg_path, args.options)

 import gradio as gr
 import torch
 from NatureLM.models.NatureLM import NatureLM
 from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
+CONFIG = None
 MODEL: NatureLM = None
+class DummyConfig:
+    def __init__(self):
+        self.generate = {
+            "max_new_tokens": 300,
+            "num_beams": 2,
+            "do_sample": False,
+            "min_length": 1,
+            "temperature": 0.1,
+            "repetition_penalty": 1.0,
+            "length_penalty": 1.0,
+        }
 def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
     cuda_enabled = torch.cuda.is_available()
     samples = prepare_sample_waveforms(audios, cuda_enabled)
     )
+def main(assets_dir: Path, device: str = "cuda"):
+    cfg = DummyConfig()
     model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
     model.to(device)
     model.eval()
         default=Path(__file__).parent / "assets",
         help="Directory containing the assets (favicon, examples, etc.)",
     )
     args = parser.parse_args()
     main(args.assets_dir, args.cfg_path, args.options)

configs/inference.yml DELETED Viewed

@@ -1,61 +0,0 @@
-model:
-  llama_path: "meta-llama/Meta-Llama-3.1-8B-Instruct"
-  freeze_beats: True
-  use_audio_Qformer: True
-  max_pooling: False
-  downsample_factor: 8
-  freeze_audio_QFormer: False
-  window_level_Qformer: True
-  num_audio_query_token: 1
-  second_per_window: 0.333333
-  second_stride: 0.333333
-  audio_llama_proj_model: ""
-  freeze_audio_llama_proj: False
-  lora: True
-  lora_rank: 32
-  lora_alpha: 32
-  lora_dropout: 0.1
-  prompt_template: "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
-  max_txt_len: 160
-  end_sym: <|end_of_text|>
-  beats_cfg:
-    input_patch_size: 16
-    embed_dim: 512
-    conv_bias: False
-    encoder_layers: 12
-    encoder_embed_dim: 768
-    encoder_ffn_embed_dim: 3072
-    encoder_attention_heads: 12
-    activation_fn: "gelu"
-    layer_wise_gradient_decay_ratio: 0.6
-    layer_norm_first: False
-    deep_norm: True
-    dropout: 0.0
-    attention_dropout: 0.0
-    activation_dropout: 0.0
-    encoder_layerdrop: 0.05
-    dropout_input: 0.0
-    conv_pos: 128
-    conv_pos_groups: 16
-    relative_position_embedding: True
-    num_buckets: 320
-    max_distance: 800
-    gru_rel_pos: True
-    finetuned_model: True
-    predictor_dropout: 0.0
-    predictor_class: 527
-generate:
-  max_new_tokens: 300
-  num_beams: 2
-  do_sample: False
-  min_length: 1
-  temperature: 0.1
-  repetition_penalty: 1.0
-  length_penalty: 1.0