File size: 2,184 Bytes
999e17e b3086d2 999e17e 0fdca50 999e17e fc13555 a989b3a 81e6cb6 999e17e 0fdca50 cc2e992 0fdca50 cc2e992 0fdca50 999e17e fc13555 a989b3a 999e17e a989b3a 999e17e cc2e992 81e6cb6 cc2e992 81e6cb6 999e17e cc2e992 999e17e cc2e992 a989b3a 999e17e cc2e992 999e17e 5c7011d 999e17e cc2e992 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
from g4f import Provider, models
from langchain.llms.base import LLM
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import asyncio
import nest_asyncio
from langchain.callbacks.manager import CallbackManager
from langchain.llms import LlamaCpp
from llama_index import ServiceContext, LLMPredictor, PromptHelper
from llama_index.text_splitter import TokenTextSplitter
from llama_index.node_parser import SimpleNodeParser
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from g4f import Provider, models
from langchain.llms.base import LLM
from llama_index.llms import LangChainLLM
from gradio import Interface
nest_asyncio.apply()
from huggingface_hub import hf_hub_download
model_name_or_path = "hlhr202/llama-7B-ggml-int4"
model_basename = "ggml-model-q4_0.bin" # the model is in bin format
#model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
n_batch = 256
embed_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
model_kwargs={"device": "cpu"})
"""
node_parser = SimpleNodeParser.from_defaults(text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20))
prompt_helper = PromptHelper(
context_window=4096,
num_output=256,
chunk_overlap_ratio=0.1,
chunk_size_limit=None
)
"""
from langchain_g4f import G4FLLM
llm = LLM = G4FLLM(
model=models.gpt_35_turbo,
provider=Provider.Acytoo,)
llm = LangChainLLM(llm=llm)
service_context = ServiceContext.from_defaults(llm=llm,
embed_model=embed_model)
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
async def main(question):
query_engine = index.as_query_engine(service_context=service_context)
response = query_engine.query(question)
print(response)
return response
iface = Interface(fn=main, inputs="text", outputs="text")
iface.launch() |