david-thrower commited on
Commit
602010e
·
verified ·
1 Parent(s): 2bb4d5a

Update app.py

Browse files

Added garbage collection after quantization and generation.

Files changed (1) hide show
  1. app.py +7 -0
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, HqqConfig
@@ -20,6 +23,8 @@ model =\
20
  quantization_config=quant_config
21
  ).to(DEVICE)
22
 
 
 
23
  #########
24
 
25
  # print("Loading tokenizer & model…")
@@ -73,6 +78,7 @@ def chat_fn(history, enable_thinking, temperature, top_p, top_k, repetition_pena
73
  # xml_tools=TOOLS
74
  )
75
  inputs = tokenizer(text, return_tensors="pt")
 
76
  with torch.inference_mode():
77
  streamer = model.generate(
78
  **inputs,
@@ -85,6 +91,7 @@ def chat_fn(history, enable_thinking, temperature, top_p, top_k, repetition_pena
85
  pad_token_id=tokenizer.eos_token_id,
86
  streamer=None # we'll yield manually
87
  )
 
88
  output_ids = streamer[0][len(inputs.input_ids[0]):]
89
  response = tokenizer.decode(output_ids, skip_special_tokens=True)
90
  if isinstance(response, str):
 
1
+
2
+ import gc
3
+
4
  import gradio as gr
5
  import torch
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, HqqConfig
 
23
  quantization_config=quant_config
24
  ).to(DEVICE)
25
 
26
+ gc.collect()
27
+
28
  #########
29
 
30
  # print("Loading tokenizer & model…")
 
78
  # xml_tools=TOOLS
79
  )
80
  inputs = tokenizer(text, return_tensors="pt")
81
+ gc.collect()
82
  with torch.inference_mode():
83
  streamer = model.generate(
84
  **inputs,
 
91
  pad_token_id=tokenizer.eos_token_id,
92
  streamer=None # we'll yield manually
93
  )
94
+ gc.collect()
95
  output_ids = streamer[0][len(inputs.input_ids[0]):]
96
  response = tokenizer.decode(output_ids, skip_special_tokens=True)
97
  if isinstance(response, str):