123
#4
by
demonissyx
- opened
README.md
CHANGED
@@ -3,14 +3,13 @@ language:
|
|
3 |
- en
|
4 |
- zh
|
5 |
pipeline_tag: text-to-audio
|
6 |
-
library_name:
|
7 |
---
|
8 |
|
9 |
# SongGeneration
|
10 |
|
11 |
-
<p align="center"><img src="img/logo.jpg" width="40%"></p>
|
12 |
<p align="center">
|
13 |
-
<a href="https://levo-demo.github.io/">Demo</a> | <a href="https://arxiv.org/abs/2506.07520">Paper</a> | <a href="https://github.com/tencent-ailab/songgeneration">Code</a> | <a href="https://huggingface.co/spaces/
|
14 |
</p>
|
15 |
|
16 |
|
@@ -20,7 +19,7 @@ This repository is the official weight repository for LeVo: High-Quality Song Ge
|
|
20 |
|
21 |
| Model | HuggingFace |
|
22 |
| :----------------------: | :----------------------------------------------------------: |
|
23 |
-
| SongGeneration-base | <a href="https://huggingface.co/tencent/SongGeneration/tree/main/ckpt/
|
24 |
| SongGeneration-base(zh&en) | Coming soon |
|
25 |
| SongGeneration-full(zh&en) | Coming soon |
|
26 |
|
@@ -32,4 +31,4 @@ We develop the SongGeneration model. It is an LM-based framework consisting of *
|
|
32 |
|
33 |
## License
|
34 |
|
35 |
-
The code and weights in this repository is released in the [LICENSE](LICENSE) file.
|
|
|
3 |
- en
|
4 |
- zh
|
5 |
pipeline_tag: text-to-audio
|
6 |
+
library_name: transformers
|
7 |
---
|
8 |
|
9 |
# SongGeneration
|
10 |
|
|
|
11 |
<p align="center">
|
12 |
+
<a href="https://levo-demo.github.io/">Demo</a> | <a href="https://arxiv.org/abs/2506.07520">Paper</a> | <a href="https://github.com/tencent-ailab/songgeneration">Code</a> | <a href="https://huggingface.co/spaces/waytan22/SongGeneration-LeVo">Space Demo</a>
|
13 |
</p>
|
14 |
|
15 |
|
|
|
19 |
|
20 |
| Model | HuggingFace |
|
21 |
| :----------------------: | :----------------------------------------------------------: |
|
22 |
+
| SongGeneration-base(zh) | <a href="https://huggingface.co/tencent/SongGeneration/tree/main/ckpt/songgeneration_base_zh">v20250520</a> |
|
23 |
| SongGeneration-base(zh&en) | Coming soon |
|
24 |
| SongGeneration-full(zh&en) | Coming soon |
|
25 |
|
|
|
31 |
|
32 |
## License
|
33 |
|
34 |
+
The code and weights in this repository is released in the [LICENSE](LICENSE) file.
|
ckpt/model_septoken/model_2.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4808167708
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:758aa342942a7b7c0ae179af1a952e0b944e39128ea816741499b3031113aaee
|
3 |
size 4808167708
|
ckpt/{songgeneration_base → songgeneration_base_zh}/config.yaml
RENAMED
@@ -106,36 +106,3 @@ conditioners:
|
|
106 |
QwTextTokenizer:
|
107 |
token_path: third_party/Qwen2-7B
|
108 |
max_len: 50
|
109 |
-
|
110 |
-
offload:
|
111 |
-
audiolm:
|
112 |
-
offload_module: self
|
113 |
-
cpu_mem_gb: 0
|
114 |
-
pre_copy_step: 1
|
115 |
-
clean_cache_after_forward: false
|
116 |
-
dtype: torch.float16
|
117 |
-
offload_layer_dict:
|
118 |
-
transformer: 4
|
119 |
-
transformer2: 4
|
120 |
-
ignore_layer_list: []
|
121 |
-
clean_cache_wrapper:
|
122 |
-
module: self
|
123 |
-
method_name: _sample_next_token
|
124 |
-
diff_mem_gb_thre: 2
|
125 |
-
debug: false
|
126 |
-
|
127 |
-
wav_tokenizer_diffusion:
|
128 |
-
offload_module: self.model.model
|
129 |
-
pre_copy_step: 1
|
130 |
-
clean_cache_after_forward: false
|
131 |
-
cpu_mem_gb: -1
|
132 |
-
dtype: null
|
133 |
-
offload_layer_dict:
|
134 |
-
cfm_wrapper: 5
|
135 |
-
hubert: 4
|
136 |
-
ignore_layer_list: []
|
137 |
-
clean_cache_wrapper:
|
138 |
-
module: self.model.model.cfm_wrapper.estimator
|
139 |
-
method_name: forward
|
140 |
-
diff_mem_gb_thre: 1
|
141 |
-
debug: false
|
|
|
106 |
QwTextTokenizer:
|
107 |
token_path: third_party/Qwen2-7B
|
108 |
max_len: 50
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ckpt/{songgeneration_base → songgeneration_base_zh}/model.pt
RENAMED
File without changes
|
img/logo.jpg
DELETED
Binary file (70.4 kB)
|
|