Spaces:

random2222
/

trykro

Sleeping

App Files Files Community

random2222 commited on Apr 11

Commit

87aa840

verified ·

1 Parent(s): 8dbabdc

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -37

app.py CHANGED Viewed

@@ -6,19 +6,8 @@ from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 from langchain_community.llms import HuggingFacePipeline
-from langchain.prompts import PromptTemplate
 from transformers import pipeline, AutoTokenizer
-# Custom prompt for detailed answers
-QA_PROMPT = PromptTemplate(
-    template="""Generate a detailed explanation using only this context:
-{context}
-Question: {question}
-Answer in complete paragraphs with examples:""",
-    input_variables=["context", "question"]
-)
 def load_documents(file_path="study_materials"):
     documents = []
     for filename in os.listdir(file_path):
@@ -36,73 +25,78 @@ def create_qa_system():
         # Load and process documents
         documents = load_documents()
         if not documents:
-            raise ValueError("No study materials found")
         text_splitter = CharacterTextSplitter(
-            chunk_size=1000,  # Increased context window
-            chunk_overlap=200,
             separator="\n\n"
         )
         texts = text_splitter.split_documents(documents)
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
         db = FAISS.from_documents(texts, embeddings)
-        # Configure for detailed responses
         tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
-        pipe = pipeline(
             "text2text-generation",
             model="google/flan-t5-base",
             tokenizer=tokenizer,
-            max_length=512,  # Double the response length
-            temperature=0.5,  # More creative but focused
-            do_sample=True,
-            top_k=50,
-            device=-1
         )
-        llm = HuggingFacePipeline(pipeline=pipe)
         return RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
-            retriever=db.as_retriever(search_kwargs={"k": 3}),  # More context
-            chain_type_kwargs={"prompt": QA_PROMPT},
             return_source_documents=True
         )
     except Exception as e:
-        raise gr.Error(f"Error: {str(e)}")
 # Initialize system
 try:
     qa = create_qa_system()
 except Exception as e:
-    print(f"Startup failed: {str(e)}")
     raise
 def ask_question(question, history):
     try:
-        result = qa.invoke({"query": question})
         answer = result["result"]
-        # Ensure minimum answer length
-        if len(answer.split()) < 50:  # At least 50 words
-            answer += "\n\nFor more details, refer to the source documents."
         sources = list({doc.metadata['source'] for doc in result['source_documents']})
         return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
     except Exception as e:
         return f"Error: {str(e)[:150]}"
 gr.ChatInterface(
     ask_question,
-    title="Detailed Study Assistant",
-    description="Ask questions and get comprehensive answers from your materials!",
     examples=[
-        "Explain the process of photosynthesis in detail",
-        "Describe the causes and effects of climate change with examples",
-        "Compare and contrast mitosis and meiosis comprehensively"
     ]
 ).launch()

 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 from langchain_community.llms import HuggingFacePipeline
 from transformers import pipeline, AutoTokenizer
 def load_documents(file_path="study_materials"):
     documents = []
     for filename in os.listdir(file_path):
         # Load and process documents
         documents = load_documents()
         if not documents:
+            raise ValueError("❗ No documents found in 'study_materials' folder")
+        # Document processing
         text_splitter = CharacterTextSplitter(
+            chunk_size=800,
+            chunk_overlap=100,
             separator="\n\n"
         )
         texts = text_splitter.split_documents(documents)
+        # Local embeddings
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
+        # Create vector store
         db = FAISS.from_documents(texts, embeddings)
+        # Configure local LLM
         tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+        local_pipe = pipeline(
             "text2text-generation",
             model="google/flan-t5-base",
             tokenizer=tokenizer,
+            max_length=400,  # Increased response length
+            temperature=0.4,
+            device=-1  # Force CPU
         )
+        # LangChain integration
+        llm = HuggingFacePipeline(pipeline=local_pipe)
         return RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
+            retriever=db.as_retriever(search_kwargs={"k": 3}),
             return_source_documents=True
         )
     except Exception as e:
+        raise gr.Error(f"Setup Error: {str(e)}")
 # Initialize system
 try:
     qa = create_qa_system()
 except Exception as e:
+    print(f"Startup Failed: {str(e)}")
     raise
 def ask_question(question, history):
     try:
+        result = qa({"query": question})
         answer = result["result"]
+        # Enforce minimum answer length
+        min_words = 75
+        if len(answer.split()) < min_words:
+            answer += f"\n\n[Note: This answer is shorter than {min_words} words. Consider rephrasing your question for more details.]"
+        # Show sources
         sources = list({doc.metadata['source'] for doc in result['source_documents']})
         return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
     except Exception as e:
         return f"Error: {str(e)[:150]}"
+# Launch interface
 gr.ChatInterface(
     ask_question,
+    title="Local Study Assistant",
+    description="100% local AI - No APIs required! Upload PDF/TXT files in 'study_materials' folder",
     examples=[
+        "Explain the key concepts from Chapter 4 in detail",
+        "What are the three main points made in section 2.3?",
+        "Compare and contrast the theories presented in pages 50-60"
     ]
 ).launch()