Spaces:

nurqoneah
/

SeaLLM

Sleeping

nurqoneah commited on Jan 11

Commit

d3ef806

verified ·

1 Parent(s): abebca4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,6 +11,9 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import os
 from dotenv import load_dotenv
 warnings.filterwarnings("ignore")
 load_dotenv()
@@ -64,16 +67,28 @@ def create_llm():
     # llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
-    return HuggingFaceHub(
-        repo_id=MODEL_NAME,
-        model_kwargs={
-            "temperature": 0.7,  # Balanced between creativity and accuracy
-            "max_new_tokens": 1024,
-            "top_p": 0.9,
-            "frequency_penalty": 0.5
-        }
     )
     # return llm
 # Improved prompt template with better context handling and response structure
 PROMPT_TEMPLATE = """

 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import os
 from dotenv import load_dotenv
+from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
 warnings.filterwarnings("ignore")
 load_dotenv()
     # llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
+    # return HuggingFaceHub(
+    #     repo_id=MODEL_NAME,
+    #     model_kwargs={
+    #         "temperature": 0.7,  # Balanced between creativity and accuracy
+    #         "max_new_tokens": 1024,
+    #         "top_p": 0.9,
+    #         "frequency_penalty": 0.5
+    #     }
+    # )
+    llm = HuggingFacePipeline.from_model_id(
+        model_id=MODEL_NAME,
+        task="text-generation",
+        pipeline_kwargs=dict(
+            max_new_tokens=512,
+            do_sample=False,
+            repetition_penalty=1.03,
+        ),
     )
     # return llm
+    # chat_model = ChatHuggingFace(llm=llm)
 # Improved prompt template with better context handling and response structure
 PROMPT_TEMPLATE = """