Spaces:

bobpopboom
/

testing

Sleeping

bobpopboom commited on Feb 9

Commit

8c068ee

verified ·

1 Parent(s): b84cd4b

ok ill do it myself then

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,14 +5,29 @@ import torch
 model_id = "thrishala/mental_health_chatbot"
 try:
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        load_in_8bit=True,
-        device_map="auto",
-        torch_dtype=torch.float16
     )
-    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 except Exception as e:
     print(f"Error loading model: {e}")
     exit()
@@ -22,8 +37,6 @@ def respond(
     history,
     system_message,
     max_tokens,
-    temperature,
-    top_p,
 ):
     # Construct the prompt with clear separation
     prompt = f"{system_message}\n"
@@ -35,8 +48,7 @@ def respond(
         response = pipe(
             prompt,
             max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
             eos_token_id=tokenizer.eos_token_id,  # Use EOS token to stop generation
         )[0]["generated_text"]
@@ -55,14 +67,6 @@ demo = gr.ChatInterface(
             label="System message",
         ),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )

 model_id = "thrishala/mental_health_chatbot"
 try:
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="cpu",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True,
+        max_memory={"cpu": "15GB"},
+        offload_folder="offload",
     )
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    tokenizer.model_max_length = 512  # Set maximum length
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        torch_dtype=torch.float16,
+        num_return_sequences=1,
+        do_sample=False,
+        truncation=True,
+        max_new_tokens=128
+    )
 except Exception as e:
     print(f"Error loading model: {e}")
     exit()
     history,
     system_message,
     max_tokens,
 ):
     # Construct the prompt with clear separation
     prompt = f"{system_message}\n"
         response = pipe(
             prompt,
             max_new_tokens=max_tokens,
+            do_sample=False,
             eos_token_id=tokenizer.eos_token_id,  # Use EOS token to stop generation
         )[0]["generated_text"]
             label="System message",
         ),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
     ],
 )