nurqoneah commited on
Commit
aaa12a7
·
verified ·
1 Parent(s): 896577b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -31
app.py CHANGED
@@ -39,41 +39,41 @@ def create_llm():
39
  """Initialize the language model with optimized parameters"""
40
 
41
 
42
- bnb_config = BitsAndBytesConfig(
43
- load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
44
- )
45
-
46
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
47
- # quantization_config=bnb_config
48
- )
49
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
50
 
51
- terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
 
 
 
52
 
53
- text_generation_pipeline = pipeline(
54
- model=model,
55
- tokenizer=tokenizer,
56
- task="text-generation",
57
- temperature=0.2,
58
- do_sample=True,
59
- repetition_penalty=1.1,
60
- return_full_text=False,
61
- max_new_tokens=200,
62
- eos_token_id=terminators,
63
- )
64
 
65
- llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
66
-
67
- # return HuggingFaceHub(
68
- # repo_id=MODEL_NAME,
69
- # model_kwargs={
70
- # "temperature": 0.7, # Balanced between creativity and accuracy
71
- # "max_new_tokens": 1024,
72
- # "top_p": 0.9,
73
- # "frequency_penalty": 0.5
74
- # }
75
  # )
76
- return llm
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  # Improved prompt template with better context handling and response structure
79
  PROMPT_TEMPLATE = """
 
39
  """Initialize the language model with optimized parameters"""
40
 
41
 
42
+ # bnb_config = BitsAndBytesConfig(
43
+ # load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
44
+ # )
 
 
 
 
 
45
 
46
+ # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
47
+ # # quantization_config=bnb_config
48
+ # )
49
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
50
 
51
+ # terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
 
 
 
 
 
 
 
 
 
 
52
 
53
+ # text_generation_pipeline = pipeline(
54
+ # model=model,
55
+ # tokenizer=tokenizer,
56
+ # task="text-generation",
57
+ # temperature=0.2,
58
+ # do_sample=True,
59
+ # repetition_penalty=1.1,
60
+ # return_full_text=False,
61
+ # max_new_tokens=200,
62
+ # eos_token_id=terminators,
63
  # )
64
+
65
+ # llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
66
+
67
+ return HuggingFaceHub(
68
+ repo_id=MODEL_NAME,
69
+ model_kwargs={
70
+ "temperature": 0.7, # Balanced between creativity and accuracy
71
+ "max_new_tokens": 1024,
72
+ "top_p": 0.9,
73
+ "frequency_penalty": 0.5
74
+ }
75
+ )
76
+ # return llm
77
 
78
  # Improved prompt template with better context handling and response structure
79
  PROMPT_TEMPLATE = """