Spaces:

ccmusic-database
/

CNPM

Running

CNPM / app.py

admin

fix cite

cdfed21 3 months ago

4 kB

	import os
	import torch
	import shutil
	import librosa
	import warnings
	import numpy as np
	import gradio as gr
	import librosa.display
	import matplotlib.pyplot as plt
	from utils import get_modelist, find_audio_files, embed_img
	from model import EvalNet


	CLASSES = ["Gong", "Shang", "Jue", "Zhi", "Yu"]
	TEMP_DIR = "./__pycache__/tmp"
	SAMPLE_RATE = 44100


	def zero_padding(y: np.ndarray, end: int):
	size = len(y)
	if size < end:
	return np.concatenate((y, np.zeros(end - size)))

	elif size > end:
	return y[-end:]

	return y


	def audio2mel(audio_path: str, seg_len=20):
	y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
	y = zero_padding(y, seg_len * sr)
	mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
	log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
	librosa.display.specshow(log_mel_spec)
	plt.axis("off")
	plt.savefig(
	f"{TEMP_DIR}/output.jpg",
	bbox_inches="tight",
	pad_inches=0.0,
	)
	plt.close()


	def audio2cqt(audio_path: str, seg_len=20):
	y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
	y = zero_padding(y, seg_len * sr)
	cqt_spec = librosa.cqt(y=y, sr=sr)
	log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
	librosa.display.specshow(log_cqt_spec)
	plt.axis("off")
	plt.savefig(
	f"{TEMP_DIR}/output.jpg",
	bbox_inches="tight",
	pad_inches=0.0,
	)
	plt.close()


	def audio2chroma(audio_path: str, seg_len=20):
	y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
	y = zero_padding(y, seg_len * sr)
	chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
	log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
	librosa.display.specshow(log_chroma_spec)
	plt.axis("off")
	plt.savefig(
	f"{TEMP_DIR}/output.jpg",
	bbox_inches="tight",
	pad_inches=0.0,
	)
	plt.close()


	def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
	if os.path.exists(folder_path):
	shutil.rmtree(folder_path)

	if not wav_path:
	return None, "Please input an audio!"

	spec = log_name.split("_")[-3]
	os.makedirs(folder_path, exist_ok=True)
	try:
	model = EvalNet(log_name, len(CLASSES)).model
	eval("audio2%s" % spec)(wav_path)

	except Exception as e:
	return None, f"{e}"

	input = embed_img(f"{folder_path}/output.jpg")
	output: torch.Tensor = model(input)
	pred_id = torch.max(output.data, 1)[1]
	return (
	os.path.basename(wav_path),
	CLASSES[pred_id].capitalize(),
	)


	if __name__ == "__main__":
	warnings.filterwarnings("ignore")
	models = get_modelist(assign_model="vit_l_16_cqt")
	examples = []
	example_audios = find_audio_files()
	for audio in example_audios:
	examples.append([audio, models[0]])

	with gr.Blocks() as demo:
	gr.Interface(
	fn=infer,
	inputs=[
	gr.Audio(label="Upload a recording", type="filepath"),
	gr.Dropdown(choices=models, label="Select a model", value=models[0]),
	],
	outputs=[
	gr.Textbox(label="Audio filename", show_copy_button=True),
	gr.Textbox(
	label="Chinese pentatonic mode recognition",
	show_copy_button=True,
	),
	],
	examples=examples,
	cache_examples=False,
	flagging_mode="never",
	title="It is recommended to keep the recording length around 20s.",
	)

	gr.Markdown(
	"""
	# Cite
	```bibtex
	@article{Zhou-2025,
	title = {CCMusic: an Open and Diverse Database for Chinese Music Information Retrieval Research},
	author = {Monan Zhou and Shenyang Xu and Zhaorui Liu and Zhaowen Wang and Feng Yu and Wei Li and Baoqiang Han},
	journal = {Transactions of the International Society for Music Information Retrieval},
	year = {2025}
	}
	```"""
	)

	demo.launch()