from huggingface_hub import hf_hub_download

model_name_or_path = "hlhr202/llama-7B-ggml-int4"
model_basename = "ggml-model-q4_0.bin" # the model is in bin format

model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
n_batch = 256 

import paperscraper
from paperqa import Docs
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.embeddings import LlamaCppEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=model_path, callbacks=[StreamingStdOutCallbackHandler()]
)
embeddings = LlamaCppEmbeddings(model_path=model_path )

docs = Docs(llm=llm, embeddings=embeddings)

keyword_search = 'bispecific antibody manufacture'
papers = paperscraper.search_papers(keyword_search, limit=2)
for path,data in papers.items():
    try:
        docs.add(path,chunk_chars=500)
    except ValueError as e:
        print('Could not read', path, e)

answer = docs.query("What manufacturing challenges are unique to bispecific antibodies?")
print(answer)

def re(r):

    print(answer)
    return r 

gr.Interface(fn=re,inputs=gr.Textbox(),outputs=gr.Textbox).launch()