isayahc commited on
Commit
66b1dc7
·
1 Parent(s): e34519b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -5,6 +5,8 @@ import boto3
5
  from botocore import UNSIGNED
6
  from botocore.client import Config
7
 
 
 
8
 
9
  from huggingface_hub import AsyncInferenceClient
10
 
@@ -18,6 +20,7 @@ from langchain.chains import RetrievalQA
18
  from langchain.prompts import ChatPromptTemplate
19
  from langchain.document_loaders import WebBaseLoader
20
  from langchain.llms.huggingface_pipeline import HuggingFacePipeline
 
21
 
22
  from transformers import AutoModel
23
 
@@ -51,11 +54,21 @@ model = AutoModelForCausalLM.from_pretrained(model_id)
51
  # model = AutoModel.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")
52
 
53
 
 
 
 
 
 
 
 
 
 
 
54
  tokenizer = AutoTokenizer.from_pretrained(model_id)
55
  # model = AutoModelForCausalLM.from_pretrained(model_id)
56
 
57
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10)
58
- hf = HuggingFacePipeline(pipeline=pipe)
59
 
60
 
61
  print( "initalized second model")
 
5
  from botocore import UNSIGNED
6
  from botocore.client import Config
7
 
8
+ import torch
9
+
10
 
11
  from huggingface_hub import AsyncInferenceClient
12
 
 
20
  from langchain.prompts import ChatPromptTemplate
21
  from langchain.document_loaders import WebBaseLoader
22
  from langchain.llms.huggingface_pipeline import HuggingFacePipeline
23
+ from langchain.llms import CTransformers
24
 
25
  from transformers import AutoModel
26
 
 
54
  # model = AutoModel.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")
55
 
56
 
57
+ llm = CTransformers(
58
+ model="TheBloke/zephyr-7B-beta-GGUF",
59
+ model_type="mistral",
60
+ max_new_tokens=4384,
61
+ temperature=0.2,
62
+ repetition_penalty=1.13,
63
+ device=device # Set the device explicitly during model initialization
64
+ )
65
+
66
+
67
  tokenizer = AutoTokenizer.from_pretrained(model_id)
68
  # model = AutoModelForCausalLM.from_pretrained(model_id)
69
 
70
+ # pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10)
71
+ # hf = HuggingFacePipeline(pipeline=pipe)
72
 
73
 
74
  print( "initalized second model")