|
import gradio as gr |
|
from g4f import Provider, models |
|
from langchain.llms.base import LLM |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
import asyncio |
|
import nest_asyncio |
|
from langchain.callbacks.manager import CallbackManager |
|
from langchain.llms import LlamaCpp |
|
from llama_index import ServiceContext, LLMPredictor, PromptHelper |
|
from llama_index.text_splitter import TokenTextSplitter |
|
from llama_index.node_parser import SimpleNodeParser |
|
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings |
|
from llama_index import SimpleDirectoryReader, VectorStoreIndex |
|
from g4f import Provider, models |
|
from langchain.llms.base import LLM |
|
from llama_index.llms import LangChainLLM |
|
from gradio import Interface |
|
nest_asyncio.apply() |
|
from huggingface_hub import hf_hub_download |
|
|
|
model_name_or_path = "hlhr202/llama-7B-ggml-int4" |
|
model_basename = "ggml-model-q4_0.bin" |
|
|
|
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename) |
|
|
|
n_gpu_layers = 40 |
|
n_batch = 256 |
|
|
|
|
|
embed_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", |
|
model_kwargs={"device": "cpu"}) |
|
""" |
|
node_parser = SimpleNodeParser.from_defaults(text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)) |
|
prompt_helper = PromptHelper( |
|
context_window=4096, |
|
num_output=256, |
|
chunk_overlap_ratio=0.1, |
|
chunk_size_limit=None |
|
) |
|
""" |
|
from langchain_g4f import G4FLLM |
|
|
|
async def main(question): |
|
llm : LLM = G4FLLM( |
|
model=models.gpt_35_turbo, |
|
provider=Provider.Acytoo, |
|
) |
|
|
|
|
|
llm = LangChainLLM(llm=llm) |
|
|
|
service_context = ServiceContext.from_defaults(llm=llm, |
|
embed_model=embed_model) |
|
|
|
documents = SimpleDirectoryReader("data/").load_data() |
|
index = VectorStoreIndex.from_documents(documents, service_context=service_context) |
|
query_engine = index.as_query_engine(service_context=service_context) |
|
response = query_engine.query(question) |
|
print(response) |
|
return response |
|
|
|
iface = Interface(fn=main, inputs="text", outputs="text") |
|
iface.launch() |
|
|