jyo01 commited on
Commit
98cafe0
·
verified ·
1 Parent(s): 14a374c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -12
app.py CHANGED
@@ -101,26 +101,53 @@ def generate_prompt(query: str, context_snippets: list) -> str:
101
  )
102
  return prompt
103
 
104
- def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-hf", max_new_tokens: int = None) -> str:
105
- if max_new_tokens is None:
106
- max_new_tokens = 1024 if is_detailed_query(prompt) else 256
107
 
108
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- if not os.path.exists("offload"):
111
- os.makedirs("offload")
112
 
113
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  model = AutoModelForCausalLM.from_pretrained(
115
  model_name,
116
  device_map="auto",
117
- offload_folder="offload", # Specify the folder where weights will be offloaded
118
  use_safetensors=False,
119
- trust_remote_code=True,
120
- torch_dtype=torch.float16,
121
- token=HF_TOKEN
122
  )
123
-
124
 
125
  text_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
126
  outputs = text_gen(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7)
@@ -134,6 +161,7 @@ def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-
134
 
135
  return answer
136
 
 
137
  ############################################
138
  # Gradio Interface Functions
139
  ############################################
 
101
  )
102
  return prompt
103
 
104
+ # def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-hf", max_new_tokens: int = None) -> str:
105
+ # if max_new_tokens is None:
106
+ # max_new_tokens = 1024 if is_detailed_query(prompt) else 256
107
 
108
+ # torch.cuda.empty_cache()
109
+
110
+ # if not os.path.exists("offload"):
111
+ # os.makedirs("offload")
112
+
113
+ # tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
114
+ # model = AutoModelForCausalLM.from_pretrained(
115
+ # model_name,
116
+ # device_map="auto",
117
+ # offload_folder="offload", # Specify the folder where weights will be offloaded
118
+ # use_safetensors=False,
119
+ # trust_remote_code=True,
120
+ # torch_dtype=torch.float16,
121
+ # token=HF_TOKEN
122
+ # )
123
 
 
 
124
 
125
+ # text_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
126
+ # outputs = text_gen(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7)
127
+ # full_response = outputs[0]['generated_text']
128
+
129
+ # marker = "Answer:"
130
+ # if marker in full_response:
131
+ # answer = full_response.split(marker, 1)[1].strip()
132
+ # else:
133
+ # answer = full_response.strip()
134
+
135
+ # return answer
136
+
137
+ def get_llm_response(prompt: str, model_name: str = "EleutherAI/gpt-neo-125M", max_new_tokens: int = None) -> str:
138
+ if max_new_tokens is None:
139
+ max_new_tokens = 256 # You can adjust this value as needed.
140
+
141
+ torch.cuda.empty_cache()
142
+
143
+ # Load the tokenizer and model for GPT-Neo 125M.
144
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
145
  model = AutoModelForCausalLM.from_pretrained(
146
  model_name,
147
  device_map="auto",
 
148
  use_safetensors=False,
149
+ torch_dtype=torch.float32 # Using default precision since model is small.
 
 
150
  )
 
151
 
152
  text_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
153
  outputs = text_gen(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7)
 
161
 
162
  return answer
163
 
164
+
165
  ############################################
166
  # Gradio Interface Functions
167
  ############################################