random2222 commited on
Commit
c7f06c3
·
verified ·
1 Parent(s): 7a39638

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -2,16 +2,16 @@ import os
2
  import gc
3
  import torch
4
  import gradio as gr
5
- from langchain.document_loaders import PyMuPDFLoader, TextLoader
6
  from langchain.text_splitter import CharacterTextSplitter
7
- from langchain.vectorstores import FAISS
8
- from langchain.embeddings import HuggingFaceEmbeddings
9
  from langchain.chains import RetrievalQA
10
- from langchain.llms import HuggingFacePipeline
11
  from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
12
  from huggingface_hub import login
13
 
14
- # Handle HF token securely
15
  if os.environ.get("HF_TOKEN"):
16
  login(token=os.environ["HF_TOKEN"])
17
 
@@ -29,7 +29,7 @@ def load_documents(file_path="study_materials"):
29
 
30
  def create_qa_system():
31
  try:
32
- # Load and split documents
33
  documents = load_documents()
34
  if not documents:
35
  raise ValueError("📚 No study materials found")
@@ -49,13 +49,7 @@ def create_qa_system():
49
  # Vector store
50
  db = FAISS.from_documents(texts, embeddings)
51
 
52
- # Quantization config
53
- quant_config = BitsAndBytesConfig(
54
- load_in_8bit=True,
55
- llm_int8_threshold=6.0
56
- )
57
-
58
- # LLM setup with optimizations
59
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
60
  pipe = pipeline(
61
  "text2text-generation",
@@ -68,14 +62,13 @@ def create_qa_system():
68
  device=-1, # Force CPU usage
69
  model_kwargs={
70
  "torch_dtype": torch.float16,
71
- "quantization_config": quant_config
72
  }
73
  )
74
 
75
  # Memory cleanup
76
  gc.collect()
77
 
78
- # Create QA system
79
  return RetrievalQA.from_llm(
80
  llm=HuggingFacePipeline(pipeline=pipe),
81
  retriever=db.as_retriever(search_kwargs={"k": 3}),
 
2
  import gc
3
  import torch
4
  import gradio as gr
5
+ from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
6
  from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain.chains import RetrievalQA
10
+ from langchain_community.llms import HuggingFacePipeline
11
  from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
12
  from huggingface_hub import login
13
 
14
+ # HF Token handling
15
  if os.environ.get("HF_TOKEN"):
16
  login(token=os.environ["HF_TOKEN"])
17
 
 
29
 
30
  def create_qa_system():
31
  try:
32
+ # Load and process documents
33
  documents = load_documents()
34
  if not documents:
35
  raise ValueError("📚 No study materials found")
 
49
  # Vector store
50
  db = FAISS.from_documents(texts, embeddings)
51
 
52
+ # LLM setup with CPU optimizations
 
 
 
 
 
 
53
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
54
  pipe = pipeline(
55
  "text2text-generation",
 
62
  device=-1, # Force CPU usage
63
  model_kwargs={
64
  "torch_dtype": torch.float16,
65
+ "low_cpu_mem_usage": True
66
  }
67
  )
68
 
69
  # Memory cleanup
70
  gc.collect()
71
 
 
72
  return RetrievalQA.from_llm(
73
  llm=HuggingFacePipeline(pipeline=pipe),
74
  retriever=db.as_retriever(search_kwargs={"k": 3}),