tdecae commited on
Commit
57faddd
Β·
verified Β·
1 Parent(s): c3aaf4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -10
app.py CHANGED
@@ -5,15 +5,15 @@ from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.vectorstores import Chroma
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
- from langchain.llms import HuggingFacePipeline
9
- from transformers import pipeline
10
  import gradio as gr
11
 
12
  # workaround for sqlite in HF spaces
13
  __import__('pysqlite3')
14
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
15
 
16
- # πŸ“„ Load documents from multiple_docs
17
  docs = []
18
  for f in os.listdir("multiple_docs"):
19
  if f.endswith(".pdf"):
@@ -26,11 +26,10 @@ for f in os.listdir("multiple_docs"):
26
  loader = TextLoader(os.path.join("multiple_docs", f))
27
  docs.extend(loader.load())
28
 
29
- # πŸ”— Split into smaller chunks
30
  splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
31
  docs = splitter.split_documents(docs)
32
 
33
- # 🧬 Prepare texts and metadata
34
  texts = [doc.page_content for doc in docs]
35
  metadatas = [{"id": i} for i in range(len(texts))]
36
 
@@ -45,12 +44,39 @@ vectorstore = Chroma(
45
  vectorstore.add_texts(texts=texts, metadatas=metadatas)
46
  vectorstore.persist()
47
 
48
- # πŸ€– Load free LLM using pipeline + wrap in HuggingFacePipeline
49
- model_name = "google/flan-t5-large" # or flan-t5-base for faster
50
- hf_pipeline = pipeline("text2text-generation", model=model_name, device=-1) # CPU
51
- llm = HuggingFacePipeline(pipeline=hf_pipeline)
52
 
53
- # πŸ”— Create conversational chain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  chain = ConversationalRetrievalChain.from_llm(
55
  llm,
56
  retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
 
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.vectorstores import Chroma
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.llms.base import LLM
9
+ from huggingface_hub import InferenceClient
10
  import gradio as gr
11
 
12
  # workaround for sqlite in HF spaces
13
  __import__('pysqlite3')
14
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
15
 
16
+ # πŸ“„ Load documents
17
  docs = []
18
  for f in os.listdir("multiple_docs"):
19
  if f.endswith(".pdf"):
 
26
  loader = TextLoader(os.path.join("multiple_docs", f))
27
  docs.extend(loader.load())
28
 
29
+ # πŸ”— Split into chunks
30
  splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
31
  docs = splitter.split_documents(docs)
32
 
 
33
  texts = [doc.page_content for doc in docs]
34
  metadatas = [{"id": i} for i in range(len(texts))]
35
 
 
44
  vectorstore.add_texts(texts=texts, metadatas=metadatas)
45
  vectorstore.persist()
46
 
47
+ # πŸ” Get HF token from env variable
48
+ HF_API_KEY = os.getenv("HF_API_KEY")
49
+ if HF_API_KEY is None:
50
+ raise ValueError("HUGGINGFACE_API_KEY environment variable is not set.")
51
 
52
+ # πŸ€– Use HuggingFace Inference API (cloud) for LLM
53
+ HF_MODEL = "deepseek-ai/deepseek-llm-7b-instruct" # you can change this to another model if you like
54
+ client = InferenceClient(token=HF_API_KEY)
55
+
56
+ # πŸ”· Wrap HF client into LangChain LLM interface
57
+ class HuggingFaceInferenceLLM(LLM):
58
+ """LLM that queries HuggingFace Inference API."""
59
+
60
+ model: str = HF_MODEL
61
+ client: InferenceClient = client
62
+
63
+ def _call(self, prompt, stop=None, run_manager=None, **kwargs):
64
+ response = self.client.text_generation(
65
+ model=self.model,
66
+ inputs=prompt,
67
+ max_new_tokens=512,
68
+ temperature=0.7,
69
+ do_sample=True,
70
+ )
71
+ return response
72
+
73
+ @property
74
+ def _llm_type(self) -> str:
75
+ return "huggingface_inference_api"
76
+
77
+ llm = HuggingFaceInferenceLLM()
78
+
79
+ # πŸ”— Conversational chain
80
  chain = ConversationalRetrievalChain.from_llm(
81
  llm,
82
  retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),