Spaces:
Running
Running
laserbeam2045
commited on
Commit
·
32dbfef
1
Parent(s):
2fc7e1b
fix
Browse files- app.py +3 -2
- requirements.txt +6 -2
app.py
CHANGED
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
|
|
12 |
app = FastAPI()
|
13 |
|
14 |
# モデルロード
|
15 |
-
model_name = "google/gemma-3-4b-it"
|
16 |
try:
|
17 |
logger.info(f"Loading model: {model_name}")
|
18 |
processor = AutoProcessor.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))
|
@@ -21,7 +21,8 @@ try:
|
|
21 |
torch_dtype=torch.bfloat16,
|
22 |
device_map="auto",
|
23 |
token=os.getenv("HF_TOKEN"),
|
24 |
-
low_cpu_mem_usage=True
|
|
|
25 |
)
|
26 |
logger.info("Model loaded successfully")
|
27 |
except Exception as e:
|
|
|
12 |
app = FastAPI()
|
13 |
|
14 |
# モデルロード
|
15 |
+
model_name = "google/gemma-3-4b-it" # 軽量な2Bモデルに変更
|
16 |
try:
|
17 |
logger.info(f"Loading model: {model_name}")
|
18 |
processor = AutoProcessor.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))
|
|
|
21 |
torch_dtype=torch.bfloat16,
|
22 |
device_map="auto",
|
23 |
token=os.getenv("HF_TOKEN"),
|
24 |
+
low_cpu_mem_usage=True,
|
25 |
+
load_in_4bit=True # 量子化でメモリ節約
|
26 |
)
|
27 |
logger.info("Model loaded successfully")
|
28 |
except Exception as e:
|
requirements.txt
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
huggingface_hub==0.25.2
|
2 |
-
torch
|
3 |
-
transformers
|
|
|
|
|
|
|
|
|
|
1 |
huggingface_hub==0.25.2
|
2 |
+
torch==2.1.0+cpu
|
3 |
+
transformers==4.44.2
|
4 |
+
bitsandbytes==0.42.0
|
5 |
+
accelerate==0.26.1
|
6 |
+
fastapi==0.115.0
|
7 |
+
uvicorn==0.30.6
|