Spaces:

chen666-666
/

wechat-ner-re

Sleeping

chen666-666 commited on Apr 16

Commit

0378c00

1 Parent(s): 4fab3f7

add app.py and requirements.txt

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,20 +17,21 @@ bert_ner_pipeline = pipeline("ner", model=bert_ner_model, tokenizer=bert_tokeniz
 chatglm_model, chatglm_tokenizer = None, None
 use_chatglm = False
 try:
-    # 强制使用CPU加载ChatGLM
-    chatglm_model_name = "THUDM/chatglm3-6b"
-    chatglm_tokenizer = AutoTokenizer.from_pretrained(chatglm_model_name, trust_remote_code=True)
     chatglm_model = AutoModel.from_pretrained(
         chatglm_model_name,
         trust_remote_code=True,
-        device_map="cpu",          # 强制使用CPU
-        torch_dtype=torch.float32, # 使用FP32精度
-        low_cpu_mem_usage=True     # 优化内存使用
     ).eval()
     use_chatglm = True
-    print("✅ ChatGLM 已加载到 CPU")
 except Exception as e:
-    print(f"❌ ChatGLM 加载失败: {e}")
 # ======================== 知识图谱结构 ========================
 knowledge_graph = {"entities": set(), "relations": set()}

 chatglm_model, chatglm_tokenizer = None, None
 use_chatglm = False
 try:
+    chatglm_model_name = "THUDM/chatglm-6b-int4"  # 4-bit量化版本
+    chatglm_tokenizer = AutoTokenizer.from_pretrained(
+        chatglm_model_name,
+        trust_remote_code=True
+    )
     chatglm_model = AutoModel.from_pretrained(
         chatglm_model_name,
         trust_remote_code=True,
+        device_map="cpu",
+        torch_dtype=torch.float32  # 必须使用float32
     ).eval()
     use_chatglm = True
+    print("✅ 4-bit量化版ChatGLM加载成功（需6GB内存）")
 except Exception as e:
+    print(f"❌ 量化模型加载失败: {e}")
 # ======================== 知识图谱结构 ========================
 knowledge_graph = {"entities": set(), "relations": set()}