Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,8 @@ import boto3
|
|
5 |
from botocore import UNSIGNED
|
6 |
from botocore.client import Config
|
7 |
|
|
|
|
|
8 |
|
9 |
from huggingface_hub import AsyncInferenceClient
|
10 |
|
@@ -18,6 +20,7 @@ from langchain.chains import RetrievalQA
|
|
18 |
from langchain.prompts import ChatPromptTemplate
|
19 |
from langchain.document_loaders import WebBaseLoader
|
20 |
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
|
|
21 |
|
22 |
from transformers import AutoModel
|
23 |
|
@@ -51,11 +54,21 @@ model = AutoModelForCausalLM.from_pretrained(model_id)
|
|
51 |
# model = AutoModel.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")
|
52 |
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
55 |
# model = AutoModelForCausalLM.from_pretrained(model_id)
|
56 |
|
57 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10)
|
58 |
-
hf = HuggingFacePipeline(pipeline=pipe)
|
59 |
|
60 |
|
61 |
print( "initalized second model")
|
|
|
5 |
from botocore import UNSIGNED
|
6 |
from botocore.client import Config
|
7 |
|
8 |
+
import torch
|
9 |
+
|
10 |
|
11 |
from huggingface_hub import AsyncInferenceClient
|
12 |
|
|
|
20 |
from langchain.prompts import ChatPromptTemplate
|
21 |
from langchain.document_loaders import WebBaseLoader
|
22 |
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
23 |
+
from langchain.llms import CTransformers
|
24 |
|
25 |
from transformers import AutoModel
|
26 |
|
|
|
54 |
# model = AutoModel.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")
|
55 |
|
56 |
|
57 |
+
llm = CTransformers(
|
58 |
+
model="TheBloke/zephyr-7B-beta-GGUF",
|
59 |
+
model_type="mistral",
|
60 |
+
max_new_tokens=4384,
|
61 |
+
temperature=0.2,
|
62 |
+
repetition_penalty=1.13,
|
63 |
+
device=device # Set the device explicitly during model initialization
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
68 |
# model = AutoModelForCausalLM.from_pretrained(model_id)
|
69 |
|
70 |
+
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10)
|
71 |
+
# hf = HuggingFacePipeline(pipeline=pipe)
|
72 |
|
73 |
|
74 |
print( "initalized second model")
|