from huggingface_hub import hf_hub_download n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool. n_batch = 256 import paperscraper from paperqa import Docs from langchain.llms import LlamaCpp from langchain import PromptTemplate, LLMChain from langchain.callbacks.manager import CallbackManager from langchain.embeddings import LlamaCppEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from g4f import Provider, models from langchain.llms.base import LLM from langchain_g4f import G4FLLM # Make sure the model path is correct for your system! llm = LLM = G4FLLM( model=models.gpt_35_turbo, provider=Provider.Aichat, ) from langchain.embeddings import HuggingFaceEmbeddings model_name = "sentence-transformers/all-mpnet-base-v2" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': True} embeddings = HuggingFaceEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) docs = Docs(llm=llm, embeddings=embeddings) docs.add_url("https://33bbf3d5-c3fe-409d-a723-d22ea129e9a0.usrfiles.com/ugd/33bbf3_a21b940230be4adbb8be48927b9dc92b.pdf") answer = docs.query("Que dis l'article 114 ?") print(answer) def re(r): print(answer) return r gr.Interface(fn=re,inputs=gr.Textbox(),outputs=gr.Textbox).launch()