Spaces:

konieshadow
/

podcast-transcriber

Running

konieshadow commited on May 27

Commit

d709cdc

1 Parent(s): 814fa89

更新模型初始化，添加设备参数支持，并将device_map默认值修改为None，以提高灵活性和兼容性。

Files changed (5) hide show

examples/simple_llm.py CHANGED Viewed

@@ -16,15 +16,16 @@ if __name__ == "__main__":
     try:
         model_name = "google/gemma-3-4b-it"
         use_4bit_quantization = False
         # gemma_chat = GemmaMLXChatCompletion(model_name="mlx-community/gemma-3-12b-it-4bit-DWQ")
         # 或者，如果您有更小、更快的模型，可以尝试使用，例如："mlx-community/gemma-2b-it-8bit"
         if model_name.startswith("mlx-community"):
             gemma_chat = GemmaMLXChatCompletion(model_name=model_name)
         elif model_name.startswith("microsoft"):
-            gemma_chat = Phi4TransformersChatCompletion(model_name=model_name, use_4bit_quantization=use_4bit_quantization)
         else:
-            gemma_chat = GemmaTransformersChatCompletion(model_name=model_name, use_4bit_quantization=use_4bit_quantization)
         print("\n--- 示例 1: 简单用户查询 ---")
         messages_example1 = [

     try:
         model_name = "google/gemma-3-4b-it"
         use_4bit_quantization = False
+        device = "mps"
         # gemma_chat = GemmaMLXChatCompletion(model_name="mlx-community/gemma-3-12b-it-4bit-DWQ")
         # 或者，如果您有更小、更快的模型，可以尝试使用，例如："mlx-community/gemma-2b-it-8bit"
         if model_name.startswith("mlx-community"):
             gemma_chat = GemmaMLXChatCompletion(model_name=model_name)
         elif model_name.startswith("microsoft"):
+            gemma_chat = Phi4TransformersChatCompletion(model_name=model_name, use_4bit_quantization=use_4bit_quantization, device=device)
         else:
+            gemma_chat = GemmaTransformersChatCompletion(model_name=model_name, use_4bit_quantization=use_4bit_quantization, device=device)
         print("\n--- 示例 1: 简单用户查询 ---")
         messages_example1 = [

src/podcast_transcribe/llm/llm_base.py CHANGED Viewed

@@ -182,7 +182,7 @@ class TransformersBaseChatCompletion(BaseChatCompletion):
         self,
         model_name: str,
         use_4bit_quantization: bool = False,
-        device_map: Optional[str] = "auto",
         device: Optional[str] = None,
         trust_remote_code: bool = True,
         torch_dtype: Optional[torch.dtype] = None

         self,
         model_name: str,
         use_4bit_quantization: bool = False,
+        device_map: Optional[str] = None,
         device: Optional[str] = None,
         trust_remote_code: bool = True,
         torch_dtype: Optional[torch.dtype] = None

src/podcast_transcribe/llm/llm_gemma_transfomers.py CHANGED Viewed

@@ -11,7 +11,7 @@ class GemmaTransformersChatCompletion(TransformersBaseChatCompletion):
         self,
         model_name: str = "google/gemma-3-4b-it",
         use_4bit_quantization: bool = False,
-        device_map: Optional[str] = "auto",
         device: Optional[str] = None,
         trust_remote_code: bool = True
     ):

         self,
         model_name: str = "google/gemma-3-4b-it",
         use_4bit_quantization: bool = False,
+        device_map: Optional[str] = None,
         device: Optional[str] = None,
         trust_remote_code: bool = True
     ):

src/podcast_transcribe/llm/llm_phi4_transfomers.py CHANGED Viewed

@@ -11,7 +11,7 @@ class Phi4TransformersChatCompletion(TransformersBaseChatCompletion):
         self,
         model_name: str = "microsoft/Phi-4-mini-reasoning",
         use_4bit_quantization: bool = False,
-        device_map: Optional[str] = "auto",
         device: Optional[str] = None,
         trust_remote_code: bool = True
     ):

         self,
         model_name: str = "microsoft/Phi-4-mini-reasoning",
         use_4bit_quantization: bool = False,
+        device_map: Optional[str] = None,
         device: Optional[str] = None,
         trust_remote_code: bool = True
     ):

src/podcast_transcribe/llm/llm_router.py CHANGED Viewed

@@ -379,7 +379,7 @@ def chat_completion(
     model: Optional[str] = None,
     device: Optional[str] = None,
     use_4bit_quantization: bool = False,
-    device_map: Optional[str] = "auto",
     trust_remote_code: bool = True,
     **kwargs
 ) -> Dict[str, Any]:
@@ -448,7 +448,7 @@ def chat_completion(
         params["device"] = device
     if use_4bit_quantization:
         params["use_4bit_quantization"] = use_4bit_quantization
-    if device_map != "auto":
         params["device_map"] = device_map
     if not trust_remote_code:
         params["trust_remote_code"] = trust_remote_code
@@ -473,7 +473,7 @@ def reasoning_completion(
     model: Optional[str] = None,
     device: Optional[str] = None,
     use_4bit_quantization: bool = False,
-    device_map: Optional[str] = "auto",
     trust_remote_code: bool = True,
     extract_reasoning_steps: bool = True,
     **kwargs
@@ -521,7 +521,7 @@ def reasoning_completion(
         params["device"] = device
     if use_4bit_quantization:
         params["use_4bit_quantization"] = use_4bit_quantization
-    if device_map != "auto":
         params["device_map"] = device_map
     if not trust_remote_code:
         params["trust_remote_code"] = trust_remote_code

     model: Optional[str] = None,
     device: Optional[str] = None,
     use_4bit_quantization: bool = False,
+    device_map: Optional[str] = None,
     trust_remote_code: bool = True,
     **kwargs
 ) -> Dict[str, Any]:
         params["device"] = device
     if use_4bit_quantization:
         params["use_4bit_quantization"] = use_4bit_quantization
+    if device_map:
         params["device_map"] = device_map
     if not trust_remote_code:
         params["trust_remote_code"] = trust_remote_code
     model: Optional[str] = None,
     device: Optional[str] = None,
     use_4bit_quantization: bool = False,
+    device_map: Optional[str] = None,
     trust_remote_code: bool = True,
     extract_reasoning_steps: bool = True,
     **kwargs
         params["device"] = device
     if use_4bit_quantization:
         params["use_4bit_quantization"] = use_4bit_quantization
+    if device_map:
         params["device_map"] = device_map
     if not trust_remote_code:
         params["trust_remote_code"] = trust_remote_code