fthor commited on
Commit
3ac1ccb
·
1 Parent(s): 854f0cf

added flash_attention

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. requirements.txt +1 -0
app.py CHANGED
@@ -18,7 +18,9 @@ processor = AutoProcessor.from_pretrained(model_id)
18
  model = LlavaForConditionalGeneration.from_pretrained(
19
  model_id,
20
  quantization_config=quantization_config,
21
- device_map="auto"
 
 
22
  )
23
 
24
 
 
18
  model = LlavaForConditionalGeneration.from_pretrained(
19
  model_id,
20
  quantization_config=quantization_config,
21
+ device_map="auto",
22
+ use_flash_attention_2=True,
23
+ low_cpu_mem_usage=True
24
  )
25
 
26
 
requirements.txt CHANGED
@@ -150,3 +150,4 @@ webencodings==0.5.1
150
  websocket-client==1.7.0
151
  websockets==11.0.3
152
  widgetsnbextension==4.0.9
 
 
150
  websocket-client==1.7.0
151
  websockets==11.0.3
152
  widgetsnbextension==4.0.9
153
+ sentence_transformers