File size: 2,184 Bytes
999e17e
 
 
b3086d2
999e17e
 
0fdca50
 
999e17e
 
 
fc13555
a989b3a
81e6cb6
 
 
999e17e
 
0fdca50
 
 
 
 
cc2e992
0fdca50
 
cc2e992
0fdca50
999e17e
fc13555
 
a989b3a
999e17e
 
 
 
 
 
 
a989b3a
999e17e
cc2e992
81e6cb6
cc2e992
81e6cb6
999e17e
cc2e992
999e17e
cc2e992
a989b3a
999e17e
cc2e992
 
 
 
999e17e
 
5c7011d
999e17e
 
 
cc2e992
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
from g4f import Provider, models
from langchain.llms.base import LLM
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import asyncio
import nest_asyncio
from langchain.callbacks.manager import CallbackManager
from langchain.llms import LlamaCpp
from llama_index import ServiceContext, LLMPredictor, PromptHelper
from llama_index.text_splitter import TokenTextSplitter
from llama_index.node_parser import SimpleNodeParser
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from g4f import Provider, models
from langchain.llms.base import LLM
from llama_index.llms import LangChainLLM 
from gradio import Interface
nest_asyncio.apply()
from huggingface_hub import hf_hub_download

model_name_or_path = "hlhr202/llama-7B-ggml-int4"
model_basename = "ggml-model-q4_0.bin" # the model is in bin format

#model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
n_batch = 256


embed_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
                                                      model_kwargs={"device": "cpu"})
"""
node_parser = SimpleNodeParser.from_defaults(text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20))
prompt_helper = PromptHelper(
  context_window=4096,
  num_output=256,
  chunk_overlap_ratio=0.1,
  chunk_size_limit=None
)
"""
from langchain_g4f import G4FLLM
llm = LLM = G4FLLM(
        model=models.gpt_35_turbo,
        provider=Provider.Acytoo,)
    

llm = LangChainLLM(llm=llm)
    
service_context = ServiceContext.from_defaults(llm=llm,
  embed_model=embed_model)

documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
  
async def main(question):
    query_engine = index.as_query_engine(service_context=service_context)
    response = query_engine.query(question)
    print(response)
    return response

iface = Interface(fn=main, inputs="text", outputs="text")
iface.launch()