random2222 commited on
Commit
8dbabdc
·
verified ·
1 Parent(s): d640554

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -19
app.py CHANGED
@@ -6,8 +6,19 @@ from langchain_community.vectorstores import FAISS
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import RetrievalQA
8
  from langchain_community.llms import HuggingFacePipeline
 
9
  from transformers import pipeline, AutoTokenizer
10
 
 
 
 
 
 
 
 
 
 
 
11
  def load_documents(file_path="study_materials"):
12
  documents = []
13
  for filename in os.listdir(file_path):
@@ -22,45 +33,43 @@ def load_documents(file_path="study_materials"):
22
 
23
  def create_qa_system():
24
  try:
25
- # Load documents
26
  documents = load_documents()
27
  if not documents:
28
- raise ValueError("📚 No study materials found")
29
-
30
- # Text splitting
31
  text_splitter = CharacterTextSplitter(
32
- chunk_size=800,
33
- chunk_overlap=100,
34
  separator="\n\n"
35
  )
36
  texts = text_splitter.split_documents(documents)
37
 
38
- # Embeddings
39
  embeddings = HuggingFaceEmbeddings(
40
  model_name="sentence-transformers/all-MiniLM-L6-v2"
41
  )
42
-
43
- # Vector store
44
  db = FAISS.from_documents(texts, embeddings)
45
 
46
- # LLM setup with proper LangChain wrapper
47
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
48
  pipe = pipeline(
49
  "text2text-generation",
50
  model="google/flan-t5-base",
51
  tokenizer=tokenizer,
52
- max_length=300,
53
- temperature=0.3,
 
 
54
  device=-1
55
  )
56
 
57
- # Wrap pipeline in LangChain component
58
  llm = HuggingFacePipeline(pipeline=pipe)
59
 
60
- # Create QA chain
61
- return RetrievalQA.from_llm(
62
  llm=llm,
63
- retriever=db.as_retriever(search_kwargs={"k": 2}),
 
 
64
  return_source_documents=True
65
  )
66
  except Exception as e:
@@ -77,6 +86,11 @@ def ask_question(question, history):
77
  try:
78
  result = qa.invoke({"query": question})
79
  answer = result["result"]
 
 
 
 
 
80
  sources = list({doc.metadata['source'] for doc in result['source_documents']})
81
  return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
82
  except Exception as e:
@@ -84,7 +98,11 @@ def ask_question(question, history):
84
 
85
  gr.ChatInterface(
86
  ask_question,
87
- title="Study Assistant",
88
- description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
89
- theme="soft"
 
 
 
 
90
  ).launch()
 
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import RetrievalQA
8
  from langchain_community.llms import HuggingFacePipeline
9
+ from langchain.prompts import PromptTemplate
10
  from transformers import pipeline, AutoTokenizer
11
 
12
+ # Custom prompt for detailed answers
13
+ QA_PROMPT = PromptTemplate(
14
+ template="""Generate a detailed explanation using only this context:
15
+ {context}
16
+
17
+ Question: {question}
18
+ Answer in complete paragraphs with examples:""",
19
+ input_variables=["context", "question"]
20
+ )
21
+
22
  def load_documents(file_path="study_materials"):
23
  documents = []
24
  for filename in os.listdir(file_path):
 
33
 
34
  def create_qa_system():
35
  try:
36
+ # Load and process documents
37
  documents = load_documents()
38
  if not documents:
39
+ raise ValueError("No study materials found")
40
+
 
41
  text_splitter = CharacterTextSplitter(
42
+ chunk_size=1000, # Increased context window
43
+ chunk_overlap=200,
44
  separator="\n\n"
45
  )
46
  texts = text_splitter.split_documents(documents)
47
 
 
48
  embeddings = HuggingFaceEmbeddings(
49
  model_name="sentence-transformers/all-MiniLM-L6-v2"
50
  )
 
 
51
  db = FAISS.from_documents(texts, embeddings)
52
 
53
+ # Configure for detailed responses
54
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
55
  pipe = pipeline(
56
  "text2text-generation",
57
  model="google/flan-t5-base",
58
  tokenizer=tokenizer,
59
+ max_length=512, # Double the response length
60
+ temperature=0.5, # More creative but focused
61
+ do_sample=True,
62
+ top_k=50,
63
  device=-1
64
  )
65
 
 
66
  llm = HuggingFacePipeline(pipeline=pipe)
67
 
68
+ return RetrievalQA.from_chain_type(
 
69
  llm=llm,
70
+ chain_type="stuff",
71
+ retriever=db.as_retriever(search_kwargs={"k": 3}), # More context
72
+ chain_type_kwargs={"prompt": QA_PROMPT},
73
  return_source_documents=True
74
  )
75
  except Exception as e:
 
86
  try:
87
  result = qa.invoke({"query": question})
88
  answer = result["result"]
89
+
90
+ # Ensure minimum answer length
91
+ if len(answer.split()) < 50: # At least 50 words
92
+ answer += "\n\nFor more details, refer to the source documents."
93
+
94
  sources = list({doc.metadata['source'] for doc in result['source_documents']})
95
  return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
96
  except Exception as e:
 
98
 
99
  gr.ChatInterface(
100
  ask_question,
101
+ title="Detailed Study Assistant",
102
+ description="Ask questions and get comprehensive answers from your materials!",
103
+ examples=[
104
+ "Explain the process of photosynthesis in detail",
105
+ "Describe the causes and effects of climate change with examples",
106
+ "Compare and contrast mitosis and meiosis comprehensively"
107
+ ]
108
  ).launch()