ehartford commited on
Commit
3bd1be9
·
verified ·
1 Parent(s): 13bd737

Upload folder using huggingface_hub

Browse files
config_diff.json CHANGED
@@ -1,9 +1,5 @@
1
  {
2
  "changed": {
3
- "model_type": {
4
- "from": "qwen2",
5
- "to": "qwen3"
6
- },
7
  "vocab_size": {
8
  "from": 152064,
9
  "to": 151936
@@ -12,10 +8,6 @@
12
  "from": "Qwen/Qwen2.5-72B-Instruct",
13
  "to": ""
14
  },
15
- "bos_token_id": {
16
- "from": 151643,
17
- "to": null
18
- },
19
  "tie_word_embeddings": {
20
  "from": false,
21
  "to": true
@@ -24,6 +16,10 @@
24
  "from": 151645,
25
  "to": null
26
  },
 
 
 
 
27
  "architectures": {
28
  "from": [
29
  "Qwen2ForCausalLM"
@@ -31,6 +27,10 @@
31
  "to": [
32
  "Qwen3ForCausalLM"
33
  ]
 
 
 
 
34
  }
35
  },
36
  "added": {
 
1
  {
2
  "changed": {
 
 
 
 
3
  "vocab_size": {
4
  "from": 152064,
5
  "to": 151936
 
8
  "from": "Qwen/Qwen2.5-72B-Instruct",
9
  "to": ""
10
  },
 
 
 
 
11
  "tie_word_embeddings": {
12
  "from": false,
13
  "to": true
 
16
  "from": 151645,
17
  "to": null
18
  },
19
+ "model_type": {
20
+ "from": "qwen2",
21
+ "to": "qwen3"
22
+ },
23
  "architectures": {
24
  "from": [
25
  "Qwen2ForCausalLM"
 
27
  "to": [
28
  "Qwen3ForCausalLM"
29
  ]
30
+ },
31
+ "bos_token_id": {
32
+ "from": 151643,
33
+ "to": null
34
  }
35
  },
36
  "added": {
conversion_metadata.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "conversion_date_utc": "2025-06-10T17:39:14.457747",
3
+ "source_model": "Qwen/Qwen2.5-72B-Instruct",
4
+ "donor_model": "Qwen/Qwen3-32B",
5
+ "warnings": [
6
+ "This is a community-created model merge. Its behavior may be unpredictable.",
7
+ "Sliding window config inherited from Qwen2.5 with Qwen3 RoPE theta - long context behavior MUST be validated.",
8
+ "Post-conversion evaluation is highly recommended for numerical stability, quantization, and safety alignment."
9
+ ]
10
+ }
model-00001-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5c87bd7774dc08a98b2c164cfaca0996191acbc3c26540247d124e15497a73a
3
  size 4546661424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ebe119ba44e78c9050dbab69114c8f4834181422f92407f335be99b3bfed912
3
  size 4546661424
model-00002-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffcd7ab15b09139a24d7654b21cd4b9c69278763e352aad92f969cd46642f5fa
3
  size 4964061232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ddddaf2fe9205022e521f60e4cfa6d23c7dbd2e4c3b1f013922eee3b4402a3f
3
  size 4964061232
model-00003-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:270771240d32d0ba80eef822b5e2dae77d93caea1f84490a5785a7f8525e2b2d
3
  size 4781577096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc9ec6b8cc2f15b94365c6824047bcac3538e70c814fb3c74f314ed49dc2095
3
  size 4781577096
model-00004-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a1b72f211aaaa2ffa7c16ea1a0d37a256bdcb0ba415289f323fcd760270d7d9
3
  size 4781610096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e0088c1b4df8a82118246d2d5d7847551e3990513b25ecabcea9a944cb1516d
3
  size 4781610096
model-00005-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cfb54e3de8f1ae5a64151cfa7c25a6e06019a34063423b87ad5aed8d79beb9d
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db62306ab03ca84de2789e5c1db2f954ce1a6b13784940305a593d7c0203a598
3
  size 4781610128
model-00006-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7ed65d7eca8c82a4791db1b75832aa9fe373b75f09b1b35f8cddafb89f4e036
3
  size 4964061264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:720b31a361a12aa6fea6449fb90f312b4156b97c7ac63afcdb39c21584a39867
3
  size 4964061264
model-00007-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e31f78b85a609f805ebe4290cf9d750456daa5ffc0a7d5a41dd97ddf2a964a9d
3
  size 4781577128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97545eafa4a41817faea3a9471e73c5be245ea29e10cae7fd11736d749c2e285
3
  size 4781577128
model-00008-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38c67c566ea3bbf10c08d2df63afb11acd5cf39ee00b7e9484dacf0f0e79710a
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a6d96fa787d0fb1eba7e84af0ae6f78301552b4676c79f010ba59c8c9667d30
3
  size 4781610128
model-00009-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31f8003f72913bd3bacbaaf8383be93b7f40a2f7e5f6190d325e44c3af1c2e02
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502e80229011cefa3fe391e2b8b500656fae9692aa841846759ee258565c7508
3
  size 4781610128
model-00010-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10bd214214221ce8295fc1687d2d0fccbd6d11e9946f0d05ad4915c3ff2f4254
3
  size 4964061264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:753bca5be6b1f6e0042b5f42a33caff3936ad4d435b89bb9894635f65629c208
3
  size 4964061264
model-00011-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a46f1a789145e670afd86bf61334cb45d03e2fcd70d6a5c7d2a3a40deb60216
3
  size 4781577128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f428d399a792b9c179d8845bef256441696ed64a8c0e20880f4d62f4faf2515b
3
  size 4781577128
model-00012-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7c6b95eaf5b23a33355425b3df730be9e95927975d5b3e4be2cdd79502586bf
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78197a393251db752c94812c92f781c338ead23cce2a971b79a5e18139c95196
3
  size 4781610128
model-00013-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:200c159cc8f4806a0f389f6cb8e82abba3dcf7914bf2681334e60a2b1c3cfe31
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86330a1824a777c2105e6fe19c81b11e17e63446726f2c138b34dc1de5999493
3
  size 4781610128
model-00014-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1c9a27aaa954d10fe2998fe3d9fcd2c0eab000eca497159d58c63d9fa10ddc3
3
  size 4964061264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3362dc363af862d300f443002d16e29b3a84ac794e1c711f86e2fe25ca973d69
3
  size 4964061264
model-00015-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:173c88345a156c20e200be235556edb4e29495c7075196f4443ca9de1287916c
3
  size 4781577128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e4bd9b0f0b87fd3d8c71dcbf357aaf7a656d9d9c851a1e2d93f9542567a957
3
  size 4781577128
model-00016-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3074880772c728570a99e0286417ea440d6bd9c5795ef60328531deeaf41870
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b446fa6f25cdb304c18d060653a75638b2030dae18e6b705c9365beb260a271
3
  size 4781610128
model-00017-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c27b08d5d44471c784e33f2944d5b69e8b950bd1822e23165f75520410802a1
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:281a9dcbb30b975a56b26d5d37fbfaf82a6d57fad6be5362c26d8a5f9759572e
3
  size 4781610128
model-00018-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a231baedea1b49219c450bdc95207f4b555ff7599a14133f0fc27d7340817a4
3
  size 4964061264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c79648f7404a3ec396dc0f3a318b9910ae97b64735a53505a2232be960fef15
3
  size 4964061264
model-00019-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63065018deb37f61e72410afce3484592d4af3951c2ebb8251628e94eb9c6e4f
3
  size 4781577128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4141b7e5b83b531706dc7850ba99ea92ede5e45835b9b82f911c39a88bee4c30
3
  size 4781577128
model-00020-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb1225790f1ba1da0bbe2a24f896240f610d75faa23703ec9ef2cb4b1a362cbc
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:549c338b690fac9f20dbd51166e9d7ea4637c949f79ea213217668a9bbfa34ab
3
  size 4781610128
model-00021-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e850aaea0846d305c8a6abf544a4f63c0c35fc9028f187f10d93e0b5b636198
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe307829e356fc6ecead37cdad243cf3fdd832f92219e1c2d77053b4b975d26
3
  size 4781610128
model-00022-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:222aa197a63dc450d912b6f34bc993ccfb3cf3e637427226aaf164ffa669606e
3
  size 4964061264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33ba2a5a0633406992b0931fa97848ac04b0e744110b21e75da9422cd4e51ec8
3
  size 4964061264
model-00023-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff80f4e57fbb429fe116c66b14043098e51b55480a8e5b9f095148322e000182
3
  size 4781577128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c338036e51aac0f2aa8810ffab9392dbba87e9d5d56b1ac641e2379cb1046b97
3
  size 4781577128
model-00024-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3265be553586f3be7dc41756370d06bb9059c2b88afbf4e3ce6b3efd0c9c8327
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68a1e351fbc3074f58b0bb6014f485a76796487400aae5b6540b5c24daf326a
3
  size 4781610128
model-00025-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a92b55701aee163cfee55e3fb19c798e705390b205fb397c2356897aee4e4c1
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c50e4f2b171237132b0349359974f26d9c6f3d03361046f5c260d22162ae2ac7
3
  size 4781610128
model-00026-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b80f8c6ac7894fc07526cd0a6248b92265251366f2d8df2dfdabdad048a4ac4
3
  size 4964061264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc179221ee6a2038c1def8fd8bc521bd5b8a8f42fae45c18d84fcab6075a976e
3
  size 4964061264
model-00027-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af1de5423acb6f99947bbcced145619a673b3412ac16f2283e25b489e7f5ff92
3
  size 4781577128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fcd6b1e73f0311bc965e0c2be822891a59c4ddc9979cb25edfcbb6753045e4b
3
  size 4781577128
model-00028-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69a3c8bfc19b7e94b095aa91c1be5088f07eebe4eb490d9411ba25e7aa081696
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fcfb6e484c433364cb232a8755a6b37c83b8cd6e9a29f35af8686895cd349ba
3
  size 4781610128
model-00029-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:574be2d6d1d69ba58c34769798c76ff5f0f0a717d9118b62cdd57e262013a5c4
3
  size 4781610128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e947e91e2494dd122a5cac3a3dbe4c5ac4bcb82ddd36e8cc99c3c69e95891e2f
3
  size 4781610128
model-00030-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd7a418d62bd41a7487963686b036f14051124a0d66f269c7efc8557b5afd62e
3
  size 3208726960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e4db59843cff2e9ec0b2b70a5ec1a3e2ccb9c9380d24e0e0bbb9592a437b067
3
  size 3208726960
qwen2to3.py CHANGED
@@ -1,16 +1,14 @@
1
- # file: convert_qwen2.5_to_qwen3_final_decoupled.py
2
-
3
  import torch
4
  import os
5
  import json
6
- import re
7
  from datetime import datetime
8
  from tqdm import tqdm
9
  from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
10
  from transformers import Qwen3Config, Qwen3ForCausalLM
11
  from collections import Counter
12
 
13
- # --- Helper Functions (Unchanged) ---
14
 
15
  def create_vocab_mapping(s_tok, t_tok):
16
  s_vocab, t_vocab = s_tok.get_vocab(), t_tok.get_vocab()
@@ -64,8 +62,8 @@ def validate_model(path):
64
  outputs = model.generate(**inputs, max_new_tokens=25, do_sample=False, pad_token_id=tokenizer.eos_token_id)
65
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
66
  print(f"Generated Response: '{response}'")
67
- assert len(response) > len(prompt) and "�" not in response, "Model generated no or garbled output."
68
- print("\n ✓ Validation successful: Model loads and generates coherent text.")
69
  except Exception as e:
70
  print(f"\n ✗ Validation FAILED: {e}")
71
 
@@ -73,7 +71,7 @@ def validate_model(path):
73
  def convert_qwen2_to_qwen3_decoupled():
74
  source_model_id, donor_model_id = "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen3-32B"
75
  target_model_path = "./Qwen3-72B-Instruct"
76
- print("Starting DECOUPLED conversion process (v5.4)...")
77
 
78
  # --- 1. Pre-flight Checks ---
79
  print("\n[Step 1/6] Running pre-flight architectural checks...")
@@ -88,8 +86,7 @@ def convert_qwen2_to_qwen3_decoupled():
88
  print("\n[Step 2/6] Loading models & tokenizers using standard AutoClasses...")
89
  dtype = torch.bfloat16
90
  s_model = AutoModelForCausalLM.from_pretrained(source_model_id, torch_dtype=dtype, device_map="auto")
91
- # Donor model is only needed for its config now, so we can load it with low_cpu_mem_usage
92
- d_model = AutoModelForCausalLM.from_pretrained(donor_model_id, torch_dtype=dtype, low_cpu_mem_usage=True)
93
  s_tokenizer = AutoTokenizer.from_pretrained(source_model_id)
94
  t_tokenizer = AutoTokenizer.from_pretrained(donor_model_id)
95
 
@@ -101,19 +98,25 @@ def convert_qwen2_to_qwen3_decoupled():
101
  # --- 4. Convert and Transfer Weights ---
102
  print("\n[Step 4/6] Converting weights (memory-safe)...")
103
  s_state_dict = {k: v.to('cpu', dtype=dtype) for k, v in tqdm(s_model.state_dict().items(), desc="Source state dict to CPU")}
 
104
 
105
  vocab_mapping = create_vocab_mapping(s_tokenizer, t_tokenizer)
106
  verify_special_tokens(s_tokenizer, t_tokenizer, vocab_mapping)
107
 
108
  new_state_dict = {}
109
- head_dim = s_config.hidden_size // s_config.num_attention_heads
110
 
111
  for key in tqdm(t_model.state_dict().keys(), desc="Transferring weights"):
112
- if "q_norm.weight" in key or "k_norm.weight" in key:
113
- # --- FINAL FIX: Initialize norm layers to a neutral state (all ones) ---
114
- # This is the safe and correct way to handle grafting normalization layers
115
- # between models of different scales.
116
- new_state_dict[key] = torch.ones(head_dim, dtype=dtype)
 
 
 
 
 
117
  elif "model.embed_tokens.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
118
  elif "lm_head.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
119
  elif key in s_state_dict: new_state_dict[key] = s_state_dict[key].clone()
@@ -128,15 +131,15 @@ def convert_qwen2_to_qwen3_decoupled():
128
  t_model.save_pretrained(target_model_path, safe_serialization=True)
129
  t_tokenizer.save_pretrained(target_model_path)
130
  save_config_diff(s_config, t_config, target_model_path)
131
- metadata = {"conversion_date_utc": datetime.now(datetime.UTC).isoformat(), "source_model": source_model_id, "donor_model": donor_model_id,
132
- "warnings": ["This is a community-created model merge. Its behavior may be unpredictable.", "q_norm/k_norm layers were initialized to ones, not grafted. This is safer but may require fine-tuning to optimize.", "Sliding window config inherited from Qwen2.5 with Qwen3 RoPE theta - long context behavior MUST be validated.", "Post-conversion evaluation is highly recommended for numerical stability, quantization, and safety alignment."]}
133
  with open(os.path.join(target_model_path, "conversion_metadata.json"), "w") as f: json.dump(metadata, f, indent=2)
134
  print(f"✅ Model saved to: {target_model_path}")
135
 
136
  # --- 6. Final Validation ---
137
- del s_model, d_model, s_state_dict, t_model, new_state_dict
138
  torch.cuda.empty_cache()
139
  validate_model(target_model_path)
140
 
141
  if __name__ == "__main__":
142
- convert_qwen2_to_qwen3_decoupled()
 
 
 
1
  import torch
2
  import os
3
  import json
4
+ import re # <-- Import the regular expression module
5
  from datetime import datetime
6
  from tqdm import tqdm
7
  from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
8
  from transformers import Qwen3Config, Qwen3ForCausalLM
9
  from collections import Counter
10
 
11
+ # --- Helper Functions (Definitive Version) ---
12
 
13
  def create_vocab_mapping(s_tok, t_tok):
14
  s_vocab, t_vocab = s_tok.get_vocab(), t_tok.get_vocab()
 
62
  outputs = model.generate(**inputs, max_new_tokens=25, do_sample=False, pad_token_id=tokenizer.eos_token_id)
63
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
64
  print(f"Generated Response: '{response}'")
65
+ assert len(response) > len(prompt), "Model did not generate new tokens."
66
+ print("\n ✓ Validation successful: Model loads and generates coherent text using standard transformers.")
67
  except Exception as e:
68
  print(f"\n ✗ Validation FAILED: {e}")
69
 
 
71
  def convert_qwen2_to_qwen3_decoupled():
72
  source_model_id, donor_model_id = "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen3-32B"
73
  target_model_path = "./Qwen3-72B-Instruct"
74
+ print("Starting DECOUPLED conversion process (v5.3)...")
75
 
76
  # --- 1. Pre-flight Checks ---
77
  print("\n[Step 1/6] Running pre-flight architectural checks...")
 
86
  print("\n[Step 2/6] Loading models & tokenizers using standard AutoClasses...")
87
  dtype = torch.bfloat16
88
  s_model = AutoModelForCausalLM.from_pretrained(source_model_id, torch_dtype=dtype, device_map="auto")
89
+ d_model = AutoModelForCausalLM.from_pretrained(donor_model_id, torch_dtype=dtype, device_map="auto")
 
90
  s_tokenizer = AutoTokenizer.from_pretrained(source_model_id)
91
  t_tokenizer = AutoTokenizer.from_pretrained(donor_model_id)
92
 
 
98
  # --- 4. Convert and Transfer Weights ---
99
  print("\n[Step 4/6] Converting weights (memory-safe)...")
100
  s_state_dict = {k: v.to('cpu', dtype=dtype) for k, v in tqdm(s_model.state_dict().items(), desc="Source state dict to CPU")}
101
+ d_state_dict = {k: v.to('cpu', dtype=dtype) for k, v in tqdm(d_model.state_dict().items(), desc="Donor state dict to CPU")}
102
 
103
  vocab_mapping = create_vocab_mapping(s_tokenizer, t_tokenizer)
104
  verify_special_tokens(s_tokenizer, t_tokenizer, vocab_mapping)
105
 
106
  new_state_dict = {}
107
+ num_donor_layers = d_config.num_hidden_layers
108
 
109
  for key in tqdm(t_model.state_dict().keys(), desc="Transferring weights"):
110
+ if "q_norm" in key or "k_norm" in key:
111
+ # --- FIX: Implement Cyclical Grafting for Norm Layers ---
112
+ match = re.search(r'layers\.(\d+)\.', key)
113
+ if match:
114
+ target_layer_idx = int(match.group(1))
115
+ donor_layer_idx = target_layer_idx % num_donor_layers
116
+ donor_key = key.replace(f'layers.{target_layer_idx}.', f'layers.{donor_layer_idx}.')
117
+ new_state_dict[key] = d_state_dict[donor_key].clone()
118
+ else:
119
+ print(f" ⚠️ Could not parse layer index for norm key: {key}. Skipping.")
120
  elif "model.embed_tokens.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
121
  elif "lm_head.weight" in key: new_state_dict[key] = create_hybrid_matrix(s_state_dict[key], vocab_mapping, (t_config.vocab_size, t_config.hidden_size))
122
  elif key in s_state_dict: new_state_dict[key] = s_state_dict[key].clone()
 
131
  t_model.save_pretrained(target_model_path, safe_serialization=True)
132
  t_tokenizer.save_pretrained(target_model_path)
133
  save_config_diff(s_config, t_config, target_model_path)
134
+ metadata = {"conversion_date_utc": datetime.utcnow().isoformat(), "source_model": source_model_id, "donor_model": donor_model_id,
135
+ "warnings": ["This is a community-created model merge. Its behavior may be unpredictable.", "Sliding window config inherited from Qwen2.5 with Qwen3 RoPE theta - long context behavior MUST be validated.", "Post-conversion evaluation is highly recommended for numerical stability, quantization, and safety alignment."]}
136
  with open(os.path.join(target_model_path, "conversion_metadata.json"), "w") as f: json.dump(metadata, f, indent=2)
137
  print(f"✅ Model saved to: {target_model_path}")
138
 
139
  # --- 6. Final Validation ---
140
+ del s_model, d_model, s_state_dict, d_state_dict, new_state_dict, t_model
141
  torch.cuda.empty_cache()
142
  validate_model(target_model_path)
143
 
144
  if __name__ == "__main__":
145
+ convert_qwen2_to_qwen3_decoupled()