Docfile commited on
Commit
0fdca50
Β·
1 Parent(s): 1b7b953

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -3,6 +3,8 @@ from g4f import Provider, models
3
  from langchain.llms.base import LLM
4
  import asyncio
5
  import nest_asyncio
 
 
6
  from llama_index import ServiceContext, LLMPredictor, PromptHelper
7
  from llama_index.text_splitter import TokenTextSplitter
8
  from llama_index.node_parser import SimpleNodeParser
@@ -10,6 +12,16 @@ from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbed
10
  from llama_index import SimpleDirectoryReader, VectorStoreIndex
11
  from gradio import Interface
12
  nest_asyncio.apply()
 
 
 
 
 
 
 
 
 
 
13
 
14
  embed_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
15
  model_kwargs={"device": "cpu"})
@@ -25,9 +37,8 @@ prompt_helper = PromptHelper(
25
  from langchain_g4f import G4FLLM
26
 
27
  async def main(question):
28
- llm: LLM = G4FLLM(
29
- model=models.gpt_35_turbo,
30
- provider=Provider.DeepAi,
31
  )
32
  from llama_index.llms import LangChainLLM
33
 
 
3
  from langchain.llms.base import LLM
4
  import asyncio
5
  import nest_asyncio
6
+ from langchain.callbacks.manager import CallbackManager
7
+ from langchain.llms import LlamaCpp
8
  from llama_index import ServiceContext, LLMPredictor, PromptHelper
9
  from llama_index.text_splitter import TokenTextSplitter
10
  from llama_index.node_parser import SimpleNodeParser
 
12
  from llama_index import SimpleDirectoryReader, VectorStoreIndex
13
  from gradio import Interface
14
  nest_asyncio.apply()
15
+ from huggingface_hub import hf_hub_download
16
+
17
+ model_name_or_path = "hlhr202/llama-7B-ggml-int4"
18
+ model_basename = "ggml-model-q4_0.bin" # the model is in bin format
19
+
20
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
21
+
22
+ n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
23
+ n_batch = 256
24
+
25
 
26
  embed_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
27
  model_kwargs={"device": "cpu"})
 
37
  from langchain_g4f import G4FLLM
38
 
39
  async def main(question):
40
+ llm = LlamaCpp(
41
+ model_path=model_path, callbacks=[StreamingStdOutCallbackHandler()]
 
42
  )
43
  from llama_index.llms import LangChainLLM
44