aliabd HF Staff commited on
Commit
97a10e0
·
verified ·
1 Parent(s): 88a747a

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. data_setups.py +5 -5
  2. requirements.txt +2 -2
  3. run.ipynb +1 -1
  4. run.py +1 -2
data_setups.py CHANGED
@@ -25,7 +25,7 @@ def find_classes(directory: str):
25
  # 3. Crearte a dictionary of index labels (computers prefer numerical rather than string labels)
26
  class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
27
  return classes, class_to_idx
28
-
29
  def resample(wav, sample_rate, new_sample_rate):
30
  if wav.shape[0] >= 2:
31
  wav = torch.mean(wav, dim=0)
@@ -61,9 +61,9 @@ def normalize(image, mean=None, std=None):
61
  def compute_melspec(wav, sample_rate=SAMPLE_RATE):
62
  melspec = librosa.feature.melspectrogram(
63
  y=wav,
64
- sr=sample_rate,
65
- n_fft=N_FFT,
66
- fmin=F_MIN,
67
  fmax=F_MAX,
68
  n_mels=N_MELS,
69
  hop_length=HOP_LEN
@@ -77,4 +77,4 @@ def audio_preprocess(wav, sample_rate):
77
  image = mono_to_color(melspec)
78
  image = normalize(image, mean=None, std=None)
79
  image = torch.from_numpy(image)
80
- return image
 
25
  # 3. Crearte a dictionary of index labels (computers prefer numerical rather than string labels)
26
  class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
27
  return classes, class_to_idx
28
+
29
  def resample(wav, sample_rate, new_sample_rate):
30
  if wav.shape[0] >= 2:
31
  wav = torch.mean(wav, dim=0)
 
61
  def compute_melspec(wav, sample_rate=SAMPLE_RATE):
62
  melspec = librosa.feature.melspectrogram(
63
  y=wav,
64
+ sr=sample_rate,
65
+ n_fft=N_FFT,
66
+ fmin=F_MIN,
67
  fmax=F_MAX,
68
  n_mels=N_MELS,
69
  hop_length=HOP_LEN
 
77
  image = mono_to_color(melspec)
78
  image = normalize(image, mean=None, std=None)
79
  image = torch.from_numpy(image)
80
+ return image
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- gradio-client @ git+https://github.com/gradio-app/gradio@de997e67c9a7feb9e2eccebf92969366dbd67eba#subdirectory=client/python
2
- https://gradio-builds.s3.amazonaws.com/de997e67c9a7feb9e2eccebf92969366dbd67eba/gradio-4.39.0-py3-none-any.whl
3
  torch==1.12.0
4
  torchvision==0.13.0
5
  torchaudio==0.12.0
 
1
+ gradio-client @ git+https://github.com/gradio-app/gradio@9b42ba8f1006c05d60a62450d3036ce0d6784f86#subdirectory=client/python
2
+ https://gradio-builds.s3.amazonaws.com/9b42ba8f1006c05d60a62450d3036ce0d6784f86/gradio-4.39.0-py3-none-any.whl
3
  torch==1.12.0
4
  torchvision==0.13.0
5
  torchaudio==0.12.0
run.ipynb CHANGED
@@ -1 +1 @@
1
- {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: musical_instrument_identification\n", "### This demo identifies musical instruments from an audio file. It uses Gradio's Audio and Label components.\n", " "]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 librosa==0.9.2 gdown"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/musical_instrument_identification/data_setups.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import torch\n", "import torchaudio\n", "from timeit import default_timer as timer\n", "from data_setups import audio_preprocess, resample\n", "import gdown\n", "\n", "url = 'https://drive.google.com/uc?id=1X5CR18u0I-ZOi_8P0cNptCe5JGk9Ro0C'\n", "output = 'piano.wav'\n", "gdown.download(url, output, quiet=False)\n", "url = 'https://drive.google.com/uc?id=1W-8HwmGR5SiyDbUcGAZYYDKdCIst07__'\n", "output= 'torch_efficientnet_fold2_CNN.pth'\n", "gdown.download(url, output, quiet=False)\n", "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "SAMPLE_RATE = 44100\n", "AUDIO_LEN = 2.90\n", "model = torch.load(\"torch_efficientnet_fold2_CNN.pth\", map_location=torch.device('cpu'))\n", "LABELS = [\n", " \"Cello\", \"Clarinet\", \"Flute\", \"Acoustic Guitar\", \"Electric Guitar\", \"Organ\", \"Piano\", \"Saxophone\", \"Trumpet\", \"Violin\", \"Voice\"\n", "]\n", "example_list = [\n", " [\"piano.wav\"]\n", "]\n", "\n", "\n", "def predict(audio_path):\n", " start_time = timer()\n", " wavform, sample_rate = torchaudio.load(audio_path)\n", " wav = resample(wavform, sample_rate, SAMPLE_RATE)\n", " if len(wav) > int(AUDIO_LEN * SAMPLE_RATE):\n", " wav = wav[:int(AUDIO_LEN * SAMPLE_RATE)]\n", " else:\n", " print(f\"input length {len(wav)} too small!, need over {int(AUDIO_LEN * SAMPLE_RATE)}\")\n", " return\n", " img = audio_preprocess(wav, SAMPLE_RATE).unsqueeze(0)\n", " model.eval()\n", " with torch.inference_mode():\n", " pred_probs = torch.softmax(model(img), dim=1)\n", " pred_labels_and_probs = {LABELS[i]: float(pred_probs[0][i]) for i in range(len(LABELS))}\n", " pred_time = round(timer() - start_time, 5)\n", " return pred_labels_and_probs, pred_time\n", "\n", "demo = gr.Interface(fn=predict,\n", " inputs=gr.Audio(type=\"filepath\"),\n", " outputs=[gr.Label(num_top_classes=11, label=\"Predictions\"), \n", " gr.Number(label=\"Prediction time (s)\")],\n", " examples=example_list,\n", " cache_examples=False\n", " )\n", "\n", "demo.launch(debug=False)\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
 
1
+ {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: musical_instrument_identification\n", "### This demo identifies musical instruments from an audio file. It uses Gradio's Audio and Label components.\n", " "]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 librosa==0.9.2 gdown"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/musical_instrument_identification/data_setups.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import torch\n", "import torchaudio\n", "from timeit import default_timer as timer\n", "from data_setups import audio_preprocess, resample\n", "import gdown\n", "\n", "url = 'https://drive.google.com/uc?id=1X5CR18u0I-ZOi_8P0cNptCe5JGk9Ro0C'\n", "output = 'piano.wav'\n", "gdown.download(url, output, quiet=False)\n", "url = 'https://drive.google.com/uc?id=1W-8HwmGR5SiyDbUcGAZYYDKdCIst07__'\n", "output= 'torch_efficientnet_fold2_CNN.pth'\n", "gdown.download(url, output, quiet=False)\n", "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "SAMPLE_RATE = 44100\n", "AUDIO_LEN = 2.90\n", "model = torch.load(\"torch_efficientnet_fold2_CNN.pth\", map_location=torch.device('cpu'))\n", "LABELS = [\n", " \"Cello\", \"Clarinet\", \"Flute\", \"Acoustic Guitar\", \"Electric Guitar\", \"Organ\", \"Piano\", \"Saxophone\", \"Trumpet\", \"Violin\", \"Voice\"\n", "]\n", "example_list = [\n", " [\"piano.wav\"]\n", "]\n", "\n", "def predict(audio_path):\n", " start_time = timer()\n", " wavform, sample_rate = torchaudio.load(audio_path)\n", " wav = resample(wavform, sample_rate, SAMPLE_RATE)\n", " if len(wav) > int(AUDIO_LEN * SAMPLE_RATE):\n", " wav = wav[:int(AUDIO_LEN * SAMPLE_RATE)]\n", " else:\n", " print(f\"input length {len(wav)} too small!, need over {int(AUDIO_LEN * SAMPLE_RATE)}\")\n", " return\n", " img = audio_preprocess(wav, SAMPLE_RATE).unsqueeze(0)\n", " model.eval()\n", " with torch.inference_mode():\n", " pred_probs = torch.softmax(model(img), dim=1)\n", " pred_labels_and_probs = {LABELS[i]: float(pred_probs[0][i]) for i in range(len(LABELS))}\n", " pred_time = round(timer() - start_time, 5)\n", " return pred_labels_and_probs, pred_time\n", "\n", "demo = gr.Interface(fn=predict,\n", " inputs=gr.Audio(type=\"filepath\"),\n", " outputs=[gr.Label(num_top_classes=11, label=\"Predictions\"),\n", " gr.Number(label=\"Prediction time (s)\")],\n", " examples=example_list,\n", " cache_examples=False\n", " )\n", "\n", "demo.launch(debug=False)\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
run.py CHANGED
@@ -22,7 +22,6 @@ example_list = [
22
  ["piano.wav"]
23
  ]
24
 
25
-
26
  def predict(audio_path):
27
  start_time = timer()
28
  wavform, sample_rate = torchaudio.load(audio_path)
@@ -42,7 +41,7 @@ def predict(audio_path):
42
 
43
  demo = gr.Interface(fn=predict,
44
  inputs=gr.Audio(type="filepath"),
45
- outputs=[gr.Label(num_top_classes=11, label="Predictions"),
46
  gr.Number(label="Prediction time (s)")],
47
  examples=example_list,
48
  cache_examples=False
 
22
  ["piano.wav"]
23
  ]
24
 
 
25
  def predict(audio_path):
26
  start_time = timer()
27
  wavform, sample_rate = torchaudio.load(audio_path)
 
41
 
42
  demo = gr.Interface(fn=predict,
43
  inputs=gr.Audio(type="filepath"),
44
+ outputs=[gr.Label(num_top_classes=11, label="Predictions"),
45
  gr.Number(label="Prediction time (s)")],
46
  examples=example_list,
47
  cache_examples=False