Spaces:

nurqoneah
/

SeaLLM

Runtime error

nurqoneah commited on Jan 11

Commit

aaa12a7

verified ·

1 Parent(s): 896577b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,41 +39,41 @@ def create_llm():
     """Initialize the language model with optimized parameters"""
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
-    )
-    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
-                                                 # quantization_config=bnb_config
-                                                )
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
-    text_generation_pipeline = pipeline(
-        model=model,
-        tokenizer=tokenizer,
-        task="text-generation",
-        temperature=0.2,
-        do_sample=True,
-        repetition_penalty=1.1,
-        return_full_text=False,
-        max_new_tokens=200,
-        eos_token_id=terminators,
-    )
-    llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
-    # return HuggingFaceHub(
-    #     repo_id=MODEL_NAME,
-    #     model_kwargs={
-    #         "temperature": 0.7,  # Balanced between creativity and accuracy
-    #         "max_new_tokens": 1024,
-    #         "top_p": 0.9,
-    #         "frequency_penalty": 0.5
-    #     }
     # )
-    return llm
 # Improved prompt template with better context handling and response structure
 PROMPT_TEMPLATE = """

     """Initialize the language model with optimized parameters"""
+    # bnb_config = BitsAndBytesConfig(
+    #     load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
+    # )
+    # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
+    #                                              # quantization_config=bnb_config
+    #                                             )
+    # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    # terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
+    # text_generation_pipeline = pipeline(
+    #     model=model,
+    #     tokenizer=tokenizer,
+    #     task="text-generation",
+    #     temperature=0.2,
+    #     do_sample=True,
+    #     repetition_penalty=1.1,
+    #     return_full_text=False,
+    #     max_new_tokens=200,
+    #     eos_token_id=terminators,
     # )
+    # llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
+    return HuggingFaceHub(
+        repo_id=MODEL_NAME,
+        model_kwargs={
+            "temperature": 0.7,  # Balanced between creativity and accuracy
+            "max_new_tokens": 1024,
+            "top_p": 0.9,
+            "frequency_penalty": 0.5
+        }
+    )
+    # return llm
 # Improved prompt template with better context handling and response structure
 PROMPT_TEMPLATE = """