Spaces:

random2222
/

trykro

Sleeping

App Files Files Community

random2222 commited on Apr 11

Commit

d640554

verified ·

1 Parent(s): 59d35cc

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -29

app.py CHANGED Viewed

@@ -5,10 +5,10 @@ from langchain_text_splitters import CharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 from transformers import pipeline, AutoTokenizer
 def load_documents(file_path="study_materials"):
-    # Supports both PDF and TXT files
     documents = []
     for filename in os.listdir(file_path):
         path = os.path.join(file_path, filename)
@@ -22,71 +22,69 @@ def load_documents(file_path="study_materials"):
 def create_qa_system():
     try:
-        # 1. Load study materials
         documents = load_documents()
         if not documents:
-            raise ValueError("📚 No PDF/TXT files found in 'study_materials' folder")
-        # 2. Smart text splitting for educational content
         text_splitter = CharacterTextSplitter(
-            chunk_size=800,  # Optimized for textbook content
             chunk_overlap=100,
-            separator="\n\n"  # Preserve paragraph structure
         )
         texts = text_splitter.split_documents(documents)
-        # 3. Educational-focused embeddings
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
-        # 4. Create knowledge base
         db = FAISS.from_documents(texts, embeddings)
-        # 5. Configure student-friendly AI
-        qa_pipeline = pipeline(
             "text2text-generation",
             model="google/flan-t5-base",
-            tokenizer=AutoTokenizer.from_pretrained("google/flan-t5-base"),
-            max_length=300,  # Longer answers for explanations
-            temperature=0.3,  # Balance creativity/facts
-            device=-1  # Force CPU usage
         )
-        return RetrievalQA.from_chain_type(
-            llm=qa_pipeline,
-            chain_type="stuff",
             retriever=db.as_retriever(search_kwargs={"k": 2}),
             return_source_documents=True
         )
     except Exception as e:
-        raise gr.Error(f"🚨 Study Assistant Setup Failed: {str(e)}")
 # Initialize system
 try:
     qa = create_qa_system()
 except Exception as e:
-    print(f"Critical Error: {str(e)}")
     raise
 def ask_question(question, history):
     try:
-        result = qa({"query": question})
         answer = result["result"]
         sources = list({doc.metadata['source'] for doc in result['source_documents']})
         return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
     except Exception as e:
-        return f"❌ Error: {str(e)[:150]}"
-# Student-friendly interface
 gr.ChatInterface(
     ask_question,
-    title="Study Buddy AI",
-    description="Ask questions about your course materials!",
-    examples=[
-        "Explain the key points from Chapter 3",
-        "What's the difference between mitosis and meiosis?",
-        "List the main causes of World War II"
-    ],
     theme="soft"
 ).launch()

 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
+from langchain_community.llms import HuggingFacePipeline
 from transformers import pipeline, AutoTokenizer
 def load_documents(file_path="study_materials"):
     documents = []
     for filename in os.listdir(file_path):
         path = os.path.join(file_path, filename)
 def create_qa_system():
     try:
+        # Load documents
         documents = load_documents()
         if not documents:
+            raise ValueError("📚 No study materials found")
+        # Text splitting
         text_splitter = CharacterTextSplitter(
+            chunk_size=800,
             chunk_overlap=100,
+            separator="\n\n"
         )
         texts = text_splitter.split_documents(documents)
+        # Embeddings
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
+        # Vector store
         db = FAISS.from_documents(texts, embeddings)
+        # LLM setup with proper LangChain wrapper
+        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+        pipe = pipeline(
             "text2text-generation",
             model="google/flan-t5-base",
+            tokenizer=tokenizer,
+            max_length=300,
+            temperature=0.3,
+            device=-1
         )
+        # Wrap pipeline in LangChain component
+        llm = HuggingFacePipeline(pipeline=pipe)
+        # Create QA chain
+        return RetrievalQA.from_llm(
+            llm=llm,
             retriever=db.as_retriever(search_kwargs={"k": 2}),
             return_source_documents=True
         )
     except Exception as e:
+        raise gr.Error(f"Error: {str(e)}")
 # Initialize system
 try:
     qa = create_qa_system()
 except Exception as e:
+    print(f"Startup failed: {str(e)}")
     raise
 def ask_question(question, history):
     try:
+        result = qa.invoke({"query": question})
         answer = result["result"]
         sources = list({doc.metadata['source'] for doc in result['source_documents']})
         return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
     except Exception as e:
+        return f"Error: {str(e)[:150]}"
 gr.ChatInterface(
     ask_question,
+    title="Study Assistant",
+    description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
     theme="soft"
 ).launch()