konieshadow commited on
Commit
c8eae1a
·
1 Parent(s): 4c76364

修改gemma3 默认模型为4b

Browse files
src/podcast_transcribe/llm/llm_gemma_transfomers.py CHANGED
@@ -9,7 +9,7 @@ class GemmaTransformersChatCompletion(TransformersBaseChatCompletion):
9
 
10
  def __init__(
11
  self,
12
- model_name: str = "google/gemma-3-12b-it",
13
  use_4bit_quantization: bool = False,
14
  device_map: Optional[str] = "auto",
15
  device: Optional[str] = None,
@@ -36,7 +36,7 @@ class GemmaTransformersChatCompletion(TransformersBaseChatCompletion):
36
 
37
  # 为了保持向后兼容性,也可以提供一个简化的工厂函数
38
  def create_gemma_transformers_client(
39
- model_name: str = "google/gemma-3-12b-it",
40
  use_4bit_quantization: bool = False,
41
  device: Optional[str] = None,
42
  **kwargs
 
9
 
10
  def __init__(
11
  self,
12
+ model_name: str = "google/gemma-3-4b-it",
13
  use_4bit_quantization: bool = False,
14
  device_map: Optional[str] = "auto",
15
  device: Optional[str] = None,
 
36
 
37
  # 为了保持向后兼容性,也可以提供一个简化的工厂函数
38
  def create_gemma_transformers_client(
39
+ model_name: str = "google/gemma-3-4b-it",
40
  use_4bit_quantization: bool = False,
41
  device: Optional[str] = None,
42
  **kwargs
src/podcast_transcribe/llm/llm_router.py CHANGED
@@ -36,7 +36,7 @@ class LLMRouter:
36
  "gemma-transformers": {
37
  "module_path": "llm_gemma_transfomers",
38
  "class_name": "GemmaTransformersChatCompletion",
39
- "default_model": "google/gemma-3-12b-it",
40
  "supported_params": [
41
  "model_name", "use_4bit_quantization", "device_map",
42
  "device", "trust_remote_code"
@@ -416,7 +416,7 @@ def chat_completion(
416
  response = chat_completion(
417
  messages=[{"role": "user", "content": "你好"}],
418
  provider="gemma-transformers",
419
- model="google/gemma-3-12b-it",
420
  device="cuda",
421
  use_4bit_quantization=True
422
  )
 
36
  "gemma-transformers": {
37
  "module_path": "llm_gemma_transfomers",
38
  "class_name": "GemmaTransformersChatCompletion",
39
+ "default_model": "google/gemma-3-4b-it",
40
  "supported_params": [
41
  "model_name", "use_4bit_quantization", "device_map",
42
  "device", "trust_remote_code"
 
416
  response = chat_completion(
417
  messages=[{"role": "user", "content": "你好"}],
418
  provider="gemma-transformers",
419
+ model="google/gemma-3-4b-it",
420
  device="cuda",
421
  use_4bit_quantization=True
422
  )
src/podcast_transcribe/transcriber.py CHANGED
@@ -61,13 +61,13 @@ class CombinedTranscriber:
61
  elif torch.cuda.is_available():
62
  device = "cuda"
63
  if not llm_model_name:
64
- llm_model_name = "google/gemma-3-12b-it"
65
  if not llm_provider:
66
  llm_provider = "gemma-transformers"
67
  else:
68
  device = "cpu"
69
  if not llm_model_name:
70
- llm_model_name = "google/gemma-3-12b-it"
71
  if not llm_provider:
72
  llm_provider = "gemma-transformers"
73
 
 
61
  elif torch.cuda.is_available():
62
  device = "cuda"
63
  if not llm_model_name:
64
+ llm_model_name = "google/gemma-3-4b-it"
65
  if not llm_provider:
66
  llm_provider = "gemma-transformers"
67
  else:
68
  device = "cpu"
69
  if not llm_model_name:
70
+ llm_model_name = "google/gemma-3-4b-it"
71
  if not llm_provider:
72
  llm_provider = "gemma-transformers"
73