from huggingface_hub import hf_hub_download model_name_or_path = "hlhr202/llama-7B-ggml-int4" model_basename = "ggml-model-q4_0.bin" # the model is in bin format model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename) n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool. n_batch = 256 import paperscraper from paperqa import Docs from langchain.llms import LlamaCpp from langchain import PromptTemplate, LLMChain from langchain.callbacks.manager import CallbackManager from langchain.embeddings import LlamaCppEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # Make sure the model path is correct for your system! llm = LlamaCpp( model_path=model_path, callbacks=[StreamingStdOutCallbackHandler()] ) embeddings = LlamaCppEmbeddings(model_path=model_path ) docs = Docs(llm=llm, embeddings=embeddings) keyword_search = 'bispecific antibody manufacture' papers = paperscraper.search_papers(keyword_search, limit=2) for path,data in papers.items(): try: docs.add(path,chunk_chars=500) except ValueError as e: print('Could not read', path, e) answer = docs.query("What manufacturing challenges are unique to bispecific antibodies?") print(answer) def re(r): print(answer) return r gr.Interface(fn=re,inputs=gr.Textbox(),outputs=gr.Textbox).launch()