random2222 commited on
Commit
23a5488
Β·
verified Β·
1 Parent(s): 02a4a64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -24
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import gradio as gr
3
  from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
4
  from langchain_text_splitters import CharacterTextSplitter
@@ -6,14 +8,12 @@ from langchain_community.vectorstores import FAISS
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import RetrievalQA
8
  from langchain_community.llms import HuggingFacePipeline
9
- from transformers import pipeline, AutoTokenizer
10
-
11
- # Optional but recommended addition
12
  from huggingface_hub import login
13
- import os
14
 
 
15
  if os.environ.get("HF_TOKEN"):
16
- login(token=os.environ["HF_TOKEN"]) # No hardcoded tokens!
17
 
18
  def load_documents(file_path="study_materials"):
19
  documents = []
@@ -29,46 +29,55 @@ def load_documents(file_path="study_materials"):
29
 
30
  def create_qa_system():
31
  try:
32
- # Load documents
33
  documents = load_documents()
34
  if not documents:
35
  raise ValueError("πŸ“š No study materials found")
36
-
37
- # Text splitting
38
  text_splitter = CharacterTextSplitter(
39
  chunk_size=1100,
40
  chunk_overlap=200,
41
  separator="\n\n"
42
  )
43
  texts = text_splitter.split_documents(documents)
44
-
45
- # Embeddings
46
  embeddings = HuggingFaceEmbeddings(
47
  model_name="sentence-transformers/all-MiniLM-L6-v2"
48
  )
49
 
50
  # Vector store
51
  db = FAISS.from_documents(texts, embeddings)
52
-
53
- # LLM setup with proper LangChain wrapper
54
- tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large") # ←
 
 
 
 
 
 
55
  pipe = pipeline(
56
  "text2text-generation",
57
  model="google/flan-t5-large",
58
- max_length=600,
59
- temperature=0.7,
60
  tokenizer=tokenizer,
 
 
61
  do_sample=True,
62
  top_k=50,
63
- device=-1
 
 
 
 
64
  )
65
-
66
- # Wrap pipeline in LangChain component
67
- llm = HuggingFacePipeline(pipeline=pipe)
68
-
69
- # Create QA chain
70
  return RetrievalQA.from_llm(
71
- llm=llm,
72
  retriever=db.as_retriever(search_kwargs={"k": 3}),
73
  return_source_documents=True
74
  )
@@ -86,7 +95,7 @@ def ask_question(question, history):
86
  try:
87
  result = qa.invoke({"query": question})
88
  answer = result["result"]
89
- sources = list({doc.metadata['source'] for doc in result['source_documents']})
90
  return f"{answer}\n\nπŸ“š Sources: {', '.join(sources)}"
91
  except Exception as e:
92
  return f"Error: {str(e)[:150]}"
@@ -96,4 +105,4 @@ gr.ChatInterface(
96
  title="Study Assistant",
97
  description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
98
  theme="soft"
99
- ).launch()
 
1
  import os
2
+ import gc
3
+ import torch
4
  import gradio as gr
5
  from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
6
  from langchain_text_splitters import CharacterTextSplitter
 
8
  from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain.chains import RetrievalQA
10
  from langchain_community.llms import HuggingFacePipeline
11
+ from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
 
 
12
  from huggingface_hub import login
 
13
 
14
+ # Handle HF token securely
15
  if os.environ.get("HF_TOKEN"):
16
+ login(token=os.environ["HF_TOKEN"])
17
 
18
  def load_documents(file_path="study_materials"):
19
  documents = []
 
29
 
30
  def create_qa_system():
31
  try:
32
+ # Load and split documents
33
  documents = load_documents()
34
  if not documents:
35
  raise ValueError("πŸ“š No study materials found")
36
+
 
37
  text_splitter = CharacterTextSplitter(
38
  chunk_size=1100,
39
  chunk_overlap=200,
40
  separator="\n\n"
41
  )
42
  texts = text_splitter.split_documents(documents)
43
+
44
+ # Create embeddings
45
  embeddings = HuggingFaceEmbeddings(
46
  model_name="sentence-transformers/all-MiniLM-L6-v2"
47
  )
48
 
49
  # Vector store
50
  db = FAISS.from_documents(texts, embeddings)
51
+
52
+ # Quantization config
53
+ quant_config = BitsAndBytesConfig(
54
+ load_in_8bit=True,
55
+ llm_int8_threshold=6.0
56
+ )
57
+
58
+ # LLM setup with optimizations
59
+ tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
60
  pipe = pipeline(
61
  "text2text-generation",
62
  model="google/flan-t5-large",
 
 
63
  tokenizer=tokenizer,
64
+ max_length=400,
65
+ temperature=0.7,
66
  do_sample=True,
67
  top_k=50,
68
+ device=-1, # Force CPU usage
69
+ model_kwargs={
70
+ "torch_dtype": torch.float16,
71
+ "quantization_config": quant_config
72
+ }
73
  )
74
+
75
+ # Memory cleanup
76
+ gc.collect()
77
+
78
+ # Create QA system
79
  return RetrievalQA.from_llm(
80
+ llm=HuggingFacePipeline(pipeline=pipe),
81
  retriever=db.as_retriever(search_kwargs={"k": 3}),
82
  return_source_documents=True
83
  )
 
95
  try:
96
  result = qa.invoke({"query": question})
97
  answer = result["result"]
98
+ sources = {os.path.basename(doc.metadata['source']) for doc in result['source_documents']}
99
  return f"{answer}\n\nπŸ“š Sources: {', '.join(sources)}"
100
  except Exception as e:
101
  return f"Error: {str(e)[:150]}"
 
105
  title="Study Assistant",
106
  description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
107
  theme="soft"
108
+ ).launch()