Spaces:
Running
Running
Commit
·
c8eae1a
1
Parent(s):
4c76364
修改gemma3 默认模型为4b
Browse files
src/podcast_transcribe/llm/llm_gemma_transfomers.py
CHANGED
@@ -9,7 +9,7 @@ class GemmaTransformersChatCompletion(TransformersBaseChatCompletion):
|
|
9 |
|
10 |
def __init__(
|
11 |
self,
|
12 |
-
model_name: str = "google/gemma-3-
|
13 |
use_4bit_quantization: bool = False,
|
14 |
device_map: Optional[str] = "auto",
|
15 |
device: Optional[str] = None,
|
@@ -36,7 +36,7 @@ class GemmaTransformersChatCompletion(TransformersBaseChatCompletion):
|
|
36 |
|
37 |
# 为了保持向后兼容性,也可以提供一个简化的工厂函数
|
38 |
def create_gemma_transformers_client(
|
39 |
-
model_name: str = "google/gemma-3-
|
40 |
use_4bit_quantization: bool = False,
|
41 |
device: Optional[str] = None,
|
42 |
**kwargs
|
|
|
9 |
|
10 |
def __init__(
|
11 |
self,
|
12 |
+
model_name: str = "google/gemma-3-4b-it",
|
13 |
use_4bit_quantization: bool = False,
|
14 |
device_map: Optional[str] = "auto",
|
15 |
device: Optional[str] = None,
|
|
|
36 |
|
37 |
# 为了保持向后兼容性,也可以提供一个简化的工厂函数
|
38 |
def create_gemma_transformers_client(
|
39 |
+
model_name: str = "google/gemma-3-4b-it",
|
40 |
use_4bit_quantization: bool = False,
|
41 |
device: Optional[str] = None,
|
42 |
**kwargs
|
src/podcast_transcribe/llm/llm_router.py
CHANGED
@@ -36,7 +36,7 @@ class LLMRouter:
|
|
36 |
"gemma-transformers": {
|
37 |
"module_path": "llm_gemma_transfomers",
|
38 |
"class_name": "GemmaTransformersChatCompletion",
|
39 |
-
"default_model": "google/gemma-3-
|
40 |
"supported_params": [
|
41 |
"model_name", "use_4bit_quantization", "device_map",
|
42 |
"device", "trust_remote_code"
|
@@ -416,7 +416,7 @@ def chat_completion(
|
|
416 |
response = chat_completion(
|
417 |
messages=[{"role": "user", "content": "你好"}],
|
418 |
provider="gemma-transformers",
|
419 |
-
model="google/gemma-3-
|
420 |
device="cuda",
|
421 |
use_4bit_quantization=True
|
422 |
)
|
|
|
36 |
"gemma-transformers": {
|
37 |
"module_path": "llm_gemma_transfomers",
|
38 |
"class_name": "GemmaTransformersChatCompletion",
|
39 |
+
"default_model": "google/gemma-3-4b-it",
|
40 |
"supported_params": [
|
41 |
"model_name", "use_4bit_quantization", "device_map",
|
42 |
"device", "trust_remote_code"
|
|
|
416 |
response = chat_completion(
|
417 |
messages=[{"role": "user", "content": "你好"}],
|
418 |
provider="gemma-transformers",
|
419 |
+
model="google/gemma-3-4b-it",
|
420 |
device="cuda",
|
421 |
use_4bit_quantization=True
|
422 |
)
|
src/podcast_transcribe/transcriber.py
CHANGED
@@ -61,13 +61,13 @@ class CombinedTranscriber:
|
|
61 |
elif torch.cuda.is_available():
|
62 |
device = "cuda"
|
63 |
if not llm_model_name:
|
64 |
-
llm_model_name = "google/gemma-3-
|
65 |
if not llm_provider:
|
66 |
llm_provider = "gemma-transformers"
|
67 |
else:
|
68 |
device = "cpu"
|
69 |
if not llm_model_name:
|
70 |
-
llm_model_name = "google/gemma-3-
|
71 |
if not llm_provider:
|
72 |
llm_provider = "gemma-transformers"
|
73 |
|
|
|
61 |
elif torch.cuda.is_available():
|
62 |
device = "cuda"
|
63 |
if not llm_model_name:
|
64 |
+
llm_model_name = "google/gemma-3-4b-it"
|
65 |
if not llm_provider:
|
66 |
llm_provider = "gemma-transformers"
|
67 |
else:
|
68 |
device = "cpu"
|
69 |
if not llm_model_name:
|
70 |
+
llm_model_name = "google/gemma-3-4b-it"
|
71 |
if not llm_provider:
|
72 |
llm_provider = "gemma-transformers"
|
73 |
|