Spaces:

random2222
/

trykro

Sleeping

App Files Files Community

random2222 commited on Apr 11

Commit

8dbabdc

verified ·

1 Parent(s): d640554

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -19

app.py CHANGED Viewed

@@ -6,8 +6,19 @@ from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 from langchain_community.llms import HuggingFacePipeline
 from transformers import pipeline, AutoTokenizer
 def load_documents(file_path="study_materials"):
     documents = []
     for filename in os.listdir(file_path):
@@ -22,45 +33,43 @@ def load_documents(file_path="study_materials"):
 def create_qa_system():
     try:
-        # Load documents
         documents = load_documents()
         if not documents:
-            raise ValueError("📚 No study materials found")
-        # Text splitting
         text_splitter = CharacterTextSplitter(
-            chunk_size=800,
-            chunk_overlap=100,
             separator="\n\n"
         )
         texts = text_splitter.split_documents(documents)
-        # Embeddings
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
-        # Vector store
         db = FAISS.from_documents(texts, embeddings)
-        # LLM setup with proper LangChain wrapper
         tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
         pipe = pipeline(
             "text2text-generation",
             model="google/flan-t5-base",
             tokenizer=tokenizer,
-            max_length=300,
-            temperature=0.3,
             device=-1
         )
-        # Wrap pipeline in LangChain component
         llm = HuggingFacePipeline(pipeline=pipe)
-        # Create QA chain
-        return RetrievalQA.from_llm(
             llm=llm,
-            retriever=db.as_retriever(search_kwargs={"k": 2}),
             return_source_documents=True
         )
     except Exception as e:
@@ -77,6 +86,11 @@ def ask_question(question, history):
     try:
         result = qa.invoke({"query": question})
         answer = result["result"]
         sources = list({doc.metadata['source'] for doc in result['source_documents']})
         return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
     except Exception as e:
@@ -84,7 +98,11 @@ def ask_question(question, history):
 gr.ChatInterface(
     ask_question,
-    title="Study Assistant",
-    description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
-    theme="soft"
 ).launch()

 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 from langchain_community.llms import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
 from transformers import pipeline, AutoTokenizer
+# Custom prompt for detailed answers
+QA_PROMPT = PromptTemplate(
+    template="""Generate a detailed explanation using only this context:
+{context}
+Question: {question}
+Answer in complete paragraphs with examples:""",
+    input_variables=["context", "question"]
+)
 def load_documents(file_path="study_materials"):
     documents = []
     for filename in os.listdir(file_path):
 def create_qa_system():
     try:
+        # Load and process documents
         documents = load_documents()
         if not documents:
+            raise ValueError("No study materials found")
         text_splitter = CharacterTextSplitter(
+            chunk_size=1000,  # Increased context window
+            chunk_overlap=200,
             separator="\n\n"
         )
         texts = text_splitter.split_documents(documents)
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
         db = FAISS.from_documents(texts, embeddings)
+        # Configure for detailed responses
         tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
         pipe = pipeline(
             "text2text-generation",
             model="google/flan-t5-base",
             tokenizer=tokenizer,
+            max_length=512,  # Double the response length
+            temperature=0.5,  # More creative but focused
+            do_sample=True,
+            top_k=50,
             device=-1
         )
         llm = HuggingFacePipeline(pipeline=pipe)
+        return RetrievalQA.from_chain_type(
             llm=llm,
+            chain_type="stuff",
+            retriever=db.as_retriever(search_kwargs={"k": 3}),  # More context
+            chain_type_kwargs={"prompt": QA_PROMPT},
             return_source_documents=True
         )
     except Exception as e:
     try:
         result = qa.invoke({"query": question})
         answer = result["result"]
+        # Ensure minimum answer length
+        if len(answer.split()) < 50:  # At least 50 words
+            answer += "\n\nFor more details, refer to the source documents."
         sources = list({doc.metadata['source'] for doc in result['source_documents']})
         return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
     except Exception as e:
 gr.ChatInterface(
     ask_question,
+    title="Detailed Study Assistant",
+    description="Ask questions and get comprehensive answers from your materials!",
+    examples=[
+        "Explain the process of photosynthesis in detail",
+        "Describe the causes and effects of climate change with examples",
+        "Compare and contrast mitosis and meiosis comprehensively"
+    ]
 ).launch()