isayahc commited on
Commit
10e2a26
·
1 Parent(s): 2ccbf4d

using quantized model to try to fix memory issue

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -32,7 +32,8 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
32
 
33
  embeddings = HuggingFaceHubEmbeddings()
34
 
35
- model_id = "HuggingFaceH4/zephyr-7b-beta"
 
36
  # model_id = "meta-llama/Llama-2-7b-chat-hf"
37
 
38
  # model = AutoModelForCausalLM.from_pretrained(
 
32
 
33
  embeddings = HuggingFaceHubEmbeddings()
34
 
35
+ model_id = "TheBloke/zephyr-7B-beta-GGUF"
36
+ # model_id = "HuggingFaceH4/zephyr-7b-beta"
37
  # model_id = "meta-llama/Llama-2-7b-chat-hf"
38
 
39
  # model = AutoModelForCausalLM.from_pretrained(