Upload folder using huggingface_hub
Browse files- config_diff.json +8 -8
- conversion_metadata.json +10 -0
- model-00001-of-00031.safetensors +1 -1
- model-00002-of-00031.safetensors +1 -1
- model-00003-of-00031.safetensors +1 -1
- model-00004-of-00031.safetensors +1 -1
- model-00005-of-00031.safetensors +1 -1
- model-00006-of-00031.safetensors +1 -1
- model-00007-of-00031.safetensors +1 -1
- model-00008-of-00031.safetensors +1 -1
- model-00009-of-00031.safetensors +1 -1
- model-00010-of-00031.safetensors +1 -1
- model-00011-of-00031.safetensors +1 -1
- model-00012-of-00031.safetensors +1 -1
- model-00013-of-00031.safetensors +1 -1
- model-00014-of-00031.safetensors +1 -1
- model-00015-of-00031.safetensors +1 -1
- model-00016-of-00031.safetensors +1 -1
- model-00017-of-00031.safetensors +1 -1
- model-00018-of-00031.safetensors +1 -1
- model-00019-of-00031.safetensors +1 -1
- model-00020-of-00031.safetensors +1 -1
- model-00021-of-00031.safetensors +1 -1
- model-00022-of-00031.safetensors +1 -1
- model-00023-of-00031.safetensors +1 -1
- model-00024-of-00031.safetensors +1 -1
- model-00025-of-00031.safetensors +1 -1
- model-00026-of-00031.safetensors +1 -1
- model-00027-of-00031.safetensors +1 -1
- model-00028-of-00031.safetensors +1 -1
- model-00029-of-00031.safetensors +1 -1
- model-00030-of-00031.safetensors +1 -1
- qwen2to3.py +22 -19
config_diff.json
CHANGED
@@ -1,9 +1,5 @@
|
|
1 |
{
|
2 |
"changed": {
|
3 |
-
"model_type": {
|
4 |
-
"from": "qwen2",
|
5 |
-
"to": "qwen3"
|
6 |
-
},
|
7 |
"vocab_size": {
|
8 |
"from": 152064,
|
9 |
"to": 151936
|
@@ -12,10 +8,6 @@
|
|
12 |
"from": "Qwen/Qwen2.5-72B-Instruct",
|
13 |
"to": ""
|
14 |
},
|
15 |
-
"bos_token_id": {
|
16 |
-
"from": 151643,
|
17 |
-
"to": null
|
18 |
-
},
|
19 |
"tie_word_embeddings": {
|
20 |
"from": false,
|
21 |
"to": true
|
@@ -24,6 +16,10 @@
|
|
24 |
"from": 151645,
|
25 |
"to": null
|
26 |
},
|
|
|
|
|
|
|
|
|
27 |
"architectures": {
|
28 |
"from": [
|
29 |
"Qwen2ForCausalLM"
|
@@ -31,6 +27,10 @@
|
|
31 |
"to": [
|
32 |
"Qwen3ForCausalLM"
|
33 |
]
|
|
|
|
|
|
|
|
|
34 |
}
|
35 |
},
|
36 |
"added": {
|
|
|
1 |
{
|
2 |
"changed": {
|
|
|
|
|
|
|
|
|
3 |
"vocab_size": {
|
4 |
"from": 152064,
|
5 |
"to": 151936
|
|
|
8 |
"from": "Qwen/Qwen2.5-72B-Instruct",
|
9 |
"to": ""
|
10 |
},
|
|
|
|
|
|
|
|
|
11 |
"tie_word_embeddings": {
|
12 |
"from": false,
|
13 |
"to": true
|
|
|
16 |
"from": 151645,
|
17 |
"to": null
|
18 |
},
|
19 |
+
"model_type": {
|
20 |
+
"from": "qwen2",
|
21 |
+
"to": "qwen3"
|
22 |
+
},
|
23 |
"architectures": {
|
24 |
"from": [
|
25 |
"Qwen2ForCausalLM"
|
|
|
27 |
"to": [
|
28 |
"Qwen3ForCausalLM"
|
29 |
]
|
30 |
+
},
|
31 |
+
"bos_token_id": {
|
32 |
+
"from": 151643,
|
33 |
+
"to": null
|
34 |
}
|
35 |
},
|
36 |
"added": {
|
conversion_metadata.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"conversion_date_utc": "2025-06-10T17:39:14.457747",
|
3 |
+
"source_model": "Qwen/Qwen2.5-72B-Instruct",
|
4 |
+
"donor_model": "Qwen/Qwen3-32B",
|
5 |
+
"warnings": [
|
6 |
+
"This is a community-created model merge. Its behavior may be unpredictable.",
|
7 |
+
"Sliding window config inherited from Qwen2.5 with Qwen3 RoPE theta - long context behavior MUST be validated.",
|
8 |
+
"Post-conversion evaluation is highly recommended for numerical stability, quantization, and safety alignment."
|
9 |
+
]
|
10 |
+
}
|
model-00001-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4546661424
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ebe119ba44e78c9050dbab69114c8f4834181422f92407f335be99b3bfed912
|
3 |
size 4546661424
|
model-00002-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4964061232
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ddddaf2fe9205022e521f60e4cfa6d23c7dbd2e4c3b1f013922eee3b4402a3f
|
3 |
size 4964061232
|
model-00003-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781577096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bc9ec6b8cc2f15b94365c6824047bcac3538e70c814fb3c74f314ed49dc2095
|
3 |
size 4781577096
|
model-00004-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e0088c1b4df8a82118246d2d5d7847551e3990513b25ecabcea9a944cb1516d
|
3 |
size 4781610096
|
model-00005-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db62306ab03ca84de2789e5c1db2f954ce1a6b13784940305a593d7c0203a598
|
3 |
size 4781610128
|
model-00006-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4964061264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:720b31a361a12aa6fea6449fb90f312b4156b97c7ac63afcdb39c21584a39867
|
3 |
size 4964061264
|
model-00007-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781577128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97545eafa4a41817faea3a9471e73c5be245ea29e10cae7fd11736d749c2e285
|
3 |
size 4781577128
|
model-00008-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a6d96fa787d0fb1eba7e84af0ae6f78301552b4676c79f010ba59c8c9667d30
|
3 |
size 4781610128
|
model-00009-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:502e80229011cefa3fe391e2b8b500656fae9692aa841846759ee258565c7508
|
3 |
size 4781610128
|
model-00010-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4964061264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:753bca5be6b1f6e0042b5f42a33caff3936ad4d435b89bb9894635f65629c208
|
3 |
size 4964061264
|
model-00011-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781577128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f428d399a792b9c179d8845bef256441696ed64a8c0e20880f4d62f4faf2515b
|
3 |
size 4781577128
|
model-00012-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78197a393251db752c94812c92f781c338ead23cce2a971b79a5e18139c95196
|
3 |
size 4781610128
|
model-00013-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86330a1824a777c2105e6fe19c81b11e17e63446726f2c138b34dc1de5999493
|
3 |
size 4781610128
|
model-00014-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4964061264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3362dc363af862d300f443002d16e29b3a84ac794e1c711f86e2fe25ca973d69
|
3 |
size 4964061264
|
model-00015-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781577128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3e4bd9b0f0b87fd3d8c71dcbf357aaf7a656d9d9c851a1e2d93f9542567a957
|
3 |
size 4781577128
|
model-00016-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b446fa6f25cdb304c18d060653a75638b2030dae18e6b705c9365beb260a271
|
3 |
size 4781610128
|
model-00017-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:281a9dcbb30b975a56b26d5d37fbfaf82a6d57fad6be5362c26d8a5f9759572e
|
3 |
size 4781610128
|
model-00018-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4964061264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c79648f7404a3ec396dc0f3a318b9910ae97b64735a53505a2232be960fef15
|
3 |
size 4964061264
|
model-00019-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781577128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4141b7e5b83b531706dc7850ba99ea92ede5e45835b9b82f911c39a88bee4c30
|
3 |
size 4781577128
|
model-00020-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:549c338b690fac9f20dbd51166e9d7ea4637c949f79ea213217668a9bbfa34ab
|
3 |
size 4781610128
|
model-00021-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fe307829e356fc6ecead37cdad243cf3fdd832f92219e1c2d77053b4b975d26
|
3 |
size 4781610128
|
model-00022-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4964061264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33ba2a5a0633406992b0931fa97848ac04b0e744110b21e75da9422cd4e51ec8
|
3 |
size 4964061264
|
model-00023-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781577128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c338036e51aac0f2aa8810ffab9392dbba87e9d5d56b1ac641e2379cb1046b97
|
3 |
size 4781577128
|
model-00024-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d68a1e351fbc3074f58b0bb6014f485a76796487400aae5b6540b5c24daf326a
|
3 |
size 4781610128
|
model-00025-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c50e4f2b171237132b0349359974f26d9c6f3d03361046f5c260d22162ae2ac7
|
3 |
size 4781610128
|
model-00026-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4964061264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc179221ee6a2038c1def8fd8bc521bd5b8a8f42fae45c18d84fcab6075a976e
|
3 |
size 4964061264
|
model-00027-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781577128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fcd6b1e73f0311bc965e0c2be822891a59c4ddc9979cb25edfcbb6753045e4b
|
3 |
size 4781577128
|
model-00028-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fcfb6e484c433364cb232a8755a6b37c83b8cd6e9a29f35af8686895cd349ba
|
3 |
size 4781610128
|
model-00029-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4781610128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e947e91e2494dd122a5cac3a3dbe4c5ac4bcb82ddd36e8cc99c3c69e95891e2f
|
3 |
size 4781610128
|
model-00030-of-00031.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3208726960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e4db59843cff2e9ec0b2b70a5ec1a3e2ccb9c9380d24e0e0bbb9592a437b067
|
3 |
size 3208726960
|
qwen2to3.py
CHANGED
@@ -1,16 +1,14 @@
|
|
1 |
-
# file: convert_qwen2.5_to_qwen3_final_decoupled.py
|
2 |
-
|
3 |
import torch
|
4 |
import os
|
5 |
import json
|
6 |
-
import re
|
7 |
from datetime import datetime
|
8 |
from tqdm import tqdm
|
9 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
|
10 |
from transformers import Qwen3Config, Qwen3ForCausalLM
|
11 |
from collections import Counter
|
12 |
|
13 |
-
# --- Helper Functions (
|
14 |
|
15 |
def create_vocab_mapping(s_tok, t_tok):
|
16 |
s_vocab, t_vocab = s_tok.get_vocab(), t_tok.get_vocab()
|
@@ -64,8 +62,8 @@ def validate_model(path):
|
|
64 |
outputs = model.generate(**inputs, max_new_tokens=25, do_sample=False, pad_token_id=tokenizer.eos_token_id)
|
65 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
66 |
print(f"Generated Response: '{response}'")
|
67 |
-
assert len(response) > len(prompt)
|
68 |
-
print("\n ✓ Validation successful: Model loads and generates coherent text.")
|
69 |
except Exception as e:
|
70 |
print(f"\n ✗ Validation FAILED: {e}")
|
71 |
|
@@ -73,7 +71,7 @@ def validate_model(path):
|
|
73 |
def convert_qwen2_to_qwen3_decoupled():
|
74 |
source_model_id, donor_model_id = "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen3-32B"
|
75 |
target_model_path = "./Qwen3-72B-Instruct"
|
76 |
-
print("Starting DECOUPLED conversion process (v5.
|
77 |
|
78 |
# --- 1. Pre-flight Checks ---
|
79 |
print("\n[Step 1/6] Running pre-flight architectural checks...")
|
@@ -88,8 +86,7 @@ def convert_qwen2_to_qwen3_decoupled():
|
|
88 |
print("\n[Step 2/6] Loading models & tokenizers using standard AutoClasses...")
|
89 |
dtype = torch.bfloat16
|
90 |
s_model = AutoModelForCausalLM.from_pretrained(source_model_id, torch_dtype=dtype, device_map="auto")
|
91 |
-
|
92 |
-
d_model = AutoModelForCausalLM.from_pretrained(donor_model_id, torch_dtype=dtype, low_cpu_mem_usage=True)
|
93 |
s_tokenizer = AutoTokenizer.from_pretrained(source_model_id)
|
94 |
t_tokenizer = AutoTokenizer.from_pretrained(donor_model_id)
|
95 |
|
@@ -101,19 +98,25 @@ def convert_qwen2_to_qwen3_decoupled():
|
|
101 |
# --- 4. Convert and Transfer Weights ---
|
102 |
print("\n[Step 4/6] Converting weights (memory-safe)...")
|
103 |
s_state_dict = {k: v.to('cpu', dtype=dtype) for k, v in tqdm(s_model.state_dict().items(), desc="Source state dict to CPU")}
|
|
|
104 |
|
105 |
vocab_mapping = create_vocab_mapping(s_tokenizer, t_tokenizer)
|
106 |
verify_special_tokens(s_tokenizer, t_tokenizer, vocab_mapping)
|
107 |
|
108 |
new_state_dict = {}
|
109 |
-
|
110 |
|
111 |
for key in tqdm(t_model.state_dict().keys(), desc="Transferring weights"):
|
112 |
-
if "q_norm
|
113 |
-
# ---
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
117 |
elif "model.embed_tokens.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
|
118 |
elif "lm_head.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
|
119 |
elif key in s_state_dict: new_state_dict[key] = s_state_dict[key].clone()
|
@@ -128,15 +131,15 @@ def convert_qwen2_to_qwen3_decoupled():
|
|
128 |
t_model.save_pretrained(target_model_path, safe_serialization=True)
|
129 |
t_tokenizer.save_pretrained(target_model_path)
|
130 |
save_config_diff(s_config, t_config, target_model_path)
|
131 |
-
metadata = {"conversion_date_utc": datetime.
|
132 |
-
"warnings": ["This is a community-created model merge. Its behavior may be unpredictable.", "
|
133 |
with open(os.path.join(target_model_path, "conversion_metadata.json"), "w") as f: json.dump(metadata, f, indent=2)
|
134 |
print(f"✅ Model saved to: {target_model_path}")
|
135 |
|
136 |
# --- 6. Final Validation ---
|
137 |
-
del s_model, d_model, s_state_dict,
|
138 |
torch.cuda.empty_cache()
|
139 |
validate_model(target_model_path)
|
140 |
|
141 |
if __name__ == "__main__":
|
142 |
-
convert_qwen2_to_qwen3_decoupled()
|
|
|
|
|
|
|
1 |
import torch
|
2 |
import os
|
3 |
import json
|
4 |
+
import re # <-- Import the regular expression module
|
5 |
from datetime import datetime
|
6 |
from tqdm import tqdm
|
7 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
|
8 |
from transformers import Qwen3Config, Qwen3ForCausalLM
|
9 |
from collections import Counter
|
10 |
|
11 |
+
# --- Helper Functions (Definitive Version) ---
|
12 |
|
13 |
def create_vocab_mapping(s_tok, t_tok):
|
14 |
s_vocab, t_vocab = s_tok.get_vocab(), t_tok.get_vocab()
|
|
|
62 |
outputs = model.generate(**inputs, max_new_tokens=25, do_sample=False, pad_token_id=tokenizer.eos_token_id)
|
63 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
64 |
print(f"Generated Response: '{response}'")
|
65 |
+
assert len(response) > len(prompt), "Model did not generate new tokens."
|
66 |
+
print("\n ✓ Validation successful: Model loads and generates coherent text using standard transformers.")
|
67 |
except Exception as e:
|
68 |
print(f"\n ✗ Validation FAILED: {e}")
|
69 |
|
|
|
71 |
def convert_qwen2_to_qwen3_decoupled():
|
72 |
source_model_id, donor_model_id = "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen3-32B"
|
73 |
target_model_path = "./Qwen3-72B-Instruct"
|
74 |
+
print("Starting DECOUPLED conversion process (v5.3)...")
|
75 |
|
76 |
# --- 1. Pre-flight Checks ---
|
77 |
print("\n[Step 1/6] Running pre-flight architectural checks...")
|
|
|
86 |
print("\n[Step 2/6] Loading models & tokenizers using standard AutoClasses...")
|
87 |
dtype = torch.bfloat16
|
88 |
s_model = AutoModelForCausalLM.from_pretrained(source_model_id, torch_dtype=dtype, device_map="auto")
|
89 |
+
d_model = AutoModelForCausalLM.from_pretrained(donor_model_id, torch_dtype=dtype, device_map="auto")
|
|
|
90 |
s_tokenizer = AutoTokenizer.from_pretrained(source_model_id)
|
91 |
t_tokenizer = AutoTokenizer.from_pretrained(donor_model_id)
|
92 |
|
|
|
98 |
# --- 4. Convert and Transfer Weights ---
|
99 |
print("\n[Step 4/6] Converting weights (memory-safe)...")
|
100 |
s_state_dict = {k: v.to('cpu', dtype=dtype) for k, v in tqdm(s_model.state_dict().items(), desc="Source state dict to CPU")}
|
101 |
+
d_state_dict = {k: v.to('cpu', dtype=dtype) for k, v in tqdm(d_model.state_dict().items(), desc="Donor state dict to CPU")}
|
102 |
|
103 |
vocab_mapping = create_vocab_mapping(s_tokenizer, t_tokenizer)
|
104 |
verify_special_tokens(s_tokenizer, t_tokenizer, vocab_mapping)
|
105 |
|
106 |
new_state_dict = {}
|
107 |
+
num_donor_layers = d_config.num_hidden_layers
|
108 |
|
109 |
for key in tqdm(t_model.state_dict().keys(), desc="Transferring weights"):
|
110 |
+
if "q_norm" in key or "k_norm" in key:
|
111 |
+
# --- FIX: Implement Cyclical Grafting for Norm Layers ---
|
112 |
+
match = re.search(r'layers\.(\d+)\.', key)
|
113 |
+
if match:
|
114 |
+
target_layer_idx = int(match.group(1))
|
115 |
+
donor_layer_idx = target_layer_idx % num_donor_layers
|
116 |
+
donor_key = key.replace(f'layers.{target_layer_idx}.', f'layers.{donor_layer_idx}.')
|
117 |
+
new_state_dict[key] = d_state_dict[donor_key].clone()
|
118 |
+
else:
|
119 |
+
print(f" ⚠️ Could not parse layer index for norm key: {key}. Skipping.")
|
120 |
elif "model.embed_tokens.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
|
121 |
elif "lm_head.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
|
122 |
elif key in s_state_dict: new_state_dict[key] = s_state_dict[key].clone()
|
|
|
131 |
t_model.save_pretrained(target_model_path, safe_serialization=True)
|
132 |
t_tokenizer.save_pretrained(target_model_path)
|
133 |
save_config_diff(s_config, t_config, target_model_path)
|
134 |
+
metadata = {"conversion_date_utc": datetime.utcnow().isoformat(), "source_model": source_model_id, "donor_model": donor_model_id,
|
135 |
+
"warnings": ["This is a community-created model merge. Its behavior may be unpredictable.", "Sliding window config inherited from Qwen2.5 with Qwen3 RoPE theta - long context behavior MUST be validated.", "Post-conversion evaluation is highly recommended for numerical stability, quantization, and safety alignment."]}
|
136 |
with open(os.path.join(target_model_path, "conversion_metadata.json"), "w") as f: json.dump(metadata, f, indent=2)
|
137 |
print(f"✅ Model saved to: {target_model_path}")
|
138 |
|
139 |
# --- 6. Final Validation ---
|
140 |
+
del s_model, d_model, s_state_dict, d_state_dict, new_state_dict, t_model
|
141 |
torch.cuda.empty_cache()
|
142 |
validate_model(target_model_path)
|
143 |
|
144 |
if __name__ == "__main__":
|
145 |
+
convert_qwen2_to_qwen3_decoupled()
|