Kazilsky commited on
Commit
2864cb2
·
verified ·
1 Parent(s): 055cf9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -4,24 +4,28 @@ import torch
4
  import gradio as gr
5
 
6
  # --- Конфиг ---
7
- MODEL_ID = "deepseek-ai/DeepSeek-V3-0324"
 
 
 
 
8
 
9
- # --- Загрузка модели ---
10
  @spaces.GPU
11
  def load_model():
12
- model = AutoModelForCausalLM.from_pretrained(
13
- MODEL_ID,
14
- device_map="auto",
15
- torch_dtype=torch.bfloat16, # Оптимально для T4/A10G
16
- trust_remote_code=True
17
- )
18
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
 
19
  return model, tokenizer
20
 
21
- model, tokenizer = load_model()
22
-
23
  # --- Генерация ---
24
  def generate(prompt: str) -> str:
 
25
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
  outputs = model.generate(
27
  **inputs,
@@ -36,4 +40,4 @@ with gr.Blocks() as demo:
36
  gr.ChatInterface(generate)
37
 
38
  if __name__ == "__main__":
39
- demo.launch()
 
4
  import gradio as gr
5
 
6
  # --- Конфиг ---
7
+ MODEL_ID = "deepseek-ai/deepseek-llm-7b"
8
+
9
+ # --- Загрузка модели (ленивая) ---
10
+ model = None
11
+ tokenizer = None
12
 
 
13
  @spaces.GPU
14
  def load_model():
15
+ global model, tokenizer
16
+ if model is None:
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ MODEL_ID,
19
+ device_map="auto",
20
+ torch_dtype=torch.bfloat16,
21
+ trust_remote_code=True
22
+ )
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
24
  return model, tokenizer
25
 
 
 
26
  # --- Генерация ---
27
  def generate(prompt: str) -> str:
28
+ model, tokenizer = load_model() # Загружаем только при первом вызове
29
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
30
  outputs = model.generate(
31
  **inputs,
 
40
  gr.ChatInterface(generate)
41
 
42
  if __name__ == "__main__":
43
+ demo.launch()